|  |  | 
|  |  | 
|  | <!DOCTYPE html> | 
|  |  | 
|  |  | 
|  | <html > | 
|  |  | 
|  | <head> | 
|  | <meta charset="utf-8" /> | 
|  | <meta name="viewport" content="width=device-width, initial-scale=1.0" /> | 
|  | <title>pyspark.ml.clustering — PySpark 4.0.0-preview2 documentation</title> | 
|  |  | 
|  |  | 
|  |  | 
|  | <script data-cfasync="false"> | 
|  | document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; | 
|  | document.documentElement.dataset.theme = localStorage.getItem("theme") || "light"; | 
|  | </script> | 
|  |  | 
|  | <!-- Loaded before other Sphinx assets --> | 
|  | <link href="../../../_static/styles/theme.css?digest=e353d410970836974a52" rel="stylesheet" /> | 
|  | <link href="../../../_static/styles/bootstrap.css?digest=e353d410970836974a52" rel="stylesheet" /> | 
|  | <link href="../../../_static/styles/pydata-sphinx-theme.css?digest=e353d410970836974a52" rel="stylesheet" /> | 
|  |  | 
|  |  | 
|  | <link href="../../../_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=e353d410970836974a52" rel="stylesheet" /> | 
|  | <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" /> | 
|  | <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" /> | 
|  | <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" /> | 
|  |  | 
|  | <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" /> | 
|  | <link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" /> | 
|  | <link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" /> | 
|  |  | 
|  | <!-- Pre-loaded scripts that we'll load fully later --> | 
|  | <link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52" /> | 
|  | <link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52" /> | 
|  |  | 
|  | <script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script> | 
|  | <script src="../../../_static/jquery.js"></script> | 
|  | <script src="../../../_static/underscore.js"></script> | 
|  | <script src="../../../_static/doctools.js"></script> | 
|  | <script src="../../../_static/clipboard.min.js"></script> | 
|  | <script src="../../../_static/copybutton.js"></script> | 
|  | <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script> | 
|  | <script>DOCUMENTATION_OPTIONS.pagename = '_modules/pyspark/ml/clustering';</script> | 
|  | <link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/ml/clustering.html" /> | 
|  | <link rel="search" title="Search" href="../../../search.html" /> | 
|  | <meta name="viewport" content="width=device-width, initial-scale=1" /> | 
|  | <meta name="docsearch:language" content="None"> | 
|  |  | 
|  |  | 
|  | <!-- Matomo --> | 
|  | <script type="text/javascript"> | 
|  | var _paq = window._paq = window._paq || []; | 
|  | /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ | 
|  | _paq.push(["disableCookies"]); | 
|  | _paq.push(['trackPageView']); | 
|  | _paq.push(['enableLinkTracking']); | 
|  | (function() { | 
|  | var u="https://analytics.apache.org/"; | 
|  | _paq.push(['setTrackerUrl', u+'matomo.php']); | 
|  | _paq.push(['setSiteId', '40']); | 
|  | var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; | 
|  | g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); | 
|  | })(); | 
|  | </script> | 
|  | <!-- End Matomo Code --> | 
|  |  | 
|  | </head> | 
|  |  | 
|  |  | 
|  | <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode=""> | 
|  |  | 
|  |  | 
|  |  | 
|  | <a class="skip-link" href="#main-content">Skip to main content</a> | 
|  |  | 
|  | <input type="checkbox" | 
|  | class="sidebar-toggle" | 
|  | name="__primary" | 
|  | id="__primary"/> | 
|  | <label class="overlay overlay-primary" for="__primary"></label> | 
|  |  | 
|  | <input type="checkbox" | 
|  | class="sidebar-toggle" | 
|  | name="__secondary" | 
|  | id="__secondary"/> | 
|  | <label class="overlay overlay-secondary" for="__secondary"></label> | 
|  |  | 
|  | <div class="search-button__wrapper"> | 
|  | <div class="search-button__overlay"></div> | 
|  | <div class="search-button__search-container"> | 
|  | <form class="bd-search d-flex align-items-center" | 
|  | action="../../../search.html" | 
|  | method="get"> | 
|  | <i class="fa-solid fa-magnifying-glass"></i> | 
|  | <input type="search" | 
|  | class="form-control" | 
|  | name="q" | 
|  | id="search-input" | 
|  | placeholder="Search the docs ..." | 
|  | aria-label="Search the docs ..." | 
|  | autocomplete="off" | 
|  | autocorrect="off" | 
|  | autocapitalize="off" | 
|  | spellcheck="false"/> | 
|  | <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span> | 
|  | </form></div> | 
|  | </div> | 
|  |  | 
|  | <nav class="bd-header navbar navbar-expand-lg bd-navbar"> | 
|  | <div class="bd-header__inner bd-page-width"> | 
|  | <label class="sidebar-toggle primary-toggle" for="__primary"> | 
|  | <span class="fa-solid fa-bars"></span> | 
|  | </label> | 
|  |  | 
|  | <div class="navbar-header-items__start"> | 
|  |  | 
|  | <div class="navbar-item"> | 
|  |  | 
|  |  | 
|  | <a class="navbar-brand logo" href="../../../index.html"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <img src="https://spark.apache.org/images/spark-logo.png" class="logo__image only-light" alt="Logo image"/> | 
|  | <script>document.write(`<img src="https://spark.apache.org/images/spark-logo-rev.svg" class="logo__image only-dark" alt="Logo image"/>`);</script> | 
|  |  | 
|  |  | 
|  | </a></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="col-lg-9 navbar-header-items"> | 
|  |  | 
|  | <div class="me-auto navbar-header-items__center"> | 
|  |  | 
|  | <div class="navbar-item"><nav class="navbar-nav"> | 
|  | <p class="sidebar-header-items__title" | 
|  | role="heading" | 
|  | aria-level="1" | 
|  | aria-label="Site Navigation"> | 
|  | Site Navigation | 
|  | </p> | 
|  | <ul class="bd-navbar-elements navbar-nav"> | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../index.html"> | 
|  | Overview | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../getting_started/index.html"> | 
|  | Getting Started | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../user_guide/index.html"> | 
|  | User Guides | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../reference/index.html"> | 
|  | API Reference | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../development/index.html"> | 
|  | Development | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../migration_guide/index.html"> | 
|  | Migration Guides | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  | </ul> | 
|  | </nav></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="navbar-header-items__end"> | 
|  |  | 
|  | <div class="navbar-item navbar-persistent--container"> | 
|  |  | 
|  | <script> | 
|  | document.write(` | 
|  | <button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> | 
|  | <i class="fa-solid fa-magnifying-glass"></i> | 
|  | </button> | 
|  | `); | 
|  | </script> | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="navbar-item"><!-- | 
|  | Licensed to the Apache Software Foundation (ASF) under one or more | 
|  | contributor license agreements.  See the NOTICE file distributed with | 
|  | this work for additional information regarding copyright ownership. | 
|  | The ASF licenses this file to You under the Apache License, Version 2.0 | 
|  | (the "License"); you may not use this file except in compliance with | 
|  | the License.  You may obtain a copy of the License at | 
|  |  | 
|  | http://www.apache.org/licenses/LICENSE-2.0 | 
|  |  | 
|  | Unless required by applicable law or agreed to in writing, software | 
|  | distributed under the License is distributed on an "AS IS" BASIS, | 
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | See the License for the specific language governing permissions and | 
|  | limitations under the License. | 
|  | --> | 
|  |  | 
|  | <div id="version-button" class="dropdown"> | 
|  | <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown"> | 
|  | 4.0.0-preview2 | 
|  | <span class="caret"></span> | 
|  | </button> | 
|  | <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button"> | 
|  | <!-- dropdown will be populated by javascript on page load --> | 
|  | </div> | 
|  | </div> | 
|  |  | 
|  | <script type="text/javascript"> | 
|  | // Function to construct the target URL from the JSON components | 
|  | function buildURL(entry) { | 
|  | var template = "https://spark.apache.org/docs/{version}/api/python/index.html";  // supplied by jinja | 
|  | template = template.replace("{version}", entry.version); | 
|  | return template; | 
|  | } | 
|  |  | 
|  | // Function to check if corresponding page path exists in other version of docs | 
|  | // and, if so, go there instead of the homepage of the other docs version | 
|  | function checkPageExistsAndRedirect(event) { | 
|  | const currentFilePath = "_modules/pyspark/ml/clustering.html", | 
|  | otherDocsHomepage = event.target.getAttribute("href"); | 
|  | let tryUrl = `${otherDocsHomepage}${currentFilePath}`; | 
|  | $.ajax({ | 
|  | type: 'HEAD', | 
|  | url: tryUrl, | 
|  | // if the page exists, go there | 
|  | success: function() { | 
|  | location.href = tryUrl; | 
|  | } | 
|  | }).fail(function() { | 
|  | location.href = otherDocsHomepage; | 
|  | }); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Function to populate the version switcher | 
|  | (function () { | 
|  | // get JSON config | 
|  | $.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) { | 
|  | // create the nodes first (before AJAX calls) to ensure the order is | 
|  | // correct (for now, links will go to doc version homepage) | 
|  | $.each(data, function(index, entry) { | 
|  | // if no custom name specified (e.g., "latest"), use version string | 
|  | if (!("name" in entry)) { | 
|  | entry.name = entry.version; | 
|  | } | 
|  | // construct the appropriate URL, and add it to the dropdown | 
|  | entry.url = buildURL(entry); | 
|  | const node = document.createElement("a"); | 
|  | node.setAttribute("class", "list-group-item list-group-item-action py-1"); | 
|  | node.setAttribute("href", `${entry.url}`); | 
|  | node.textContent = `${entry.name}`; | 
|  | node.onclick = checkPageExistsAndRedirect; | 
|  | $("#version_switcher").append(node); | 
|  | }); | 
|  | }); | 
|  | })(); | 
|  | </script></div> | 
|  |  | 
|  | <div class="navbar-item"> | 
|  | <script> | 
|  | document.write(` | 
|  | <button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip"> | 
|  | <span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span> | 
|  | <span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span> | 
|  | <span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span> | 
|  | </button> | 
|  | `); | 
|  | </script></div> | 
|  |  | 
|  | <div class="navbar-item"><ul class="navbar-icon-links navbar-nav" | 
|  | aria-label="Icon Links"> | 
|  | <li class="nav-item"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span> | 
|  | <label class="sr-only">GitHub</label></a> | 
|  | </li> | 
|  | <li class="nav-item"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span> | 
|  | <label class="sr-only">PyPI</label></a> | 
|  | </li> | 
|  | </ul></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="navbar-persistent--mobile"> | 
|  | <script> | 
|  | document.write(` | 
|  | <button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> | 
|  | <i class="fa-solid fa-magnifying-glass"></i> | 
|  | </button> | 
|  | `); | 
|  | </script> | 
|  | </div> | 
|  |  | 
|  |  | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </nav> | 
|  |  | 
|  | <div class="bd-container"> | 
|  | <div class="bd-container__inner bd-page-width"> | 
|  |  | 
|  | <div class="bd-sidebar-primary bd-sidebar hide-on-wide"> | 
|  |  | 
|  |  | 
|  |  | 
|  | <div class="sidebar-header-items sidebar-primary__section"> | 
|  |  | 
|  |  | 
|  | <div class="sidebar-header-items__center"> | 
|  |  | 
|  | <div class="navbar-item"><nav class="navbar-nav"> | 
|  | <p class="sidebar-header-items__title" | 
|  | role="heading" | 
|  | aria-level="1" | 
|  | aria-label="Site Navigation"> | 
|  | Site Navigation | 
|  | </p> | 
|  | <ul class="bd-navbar-elements navbar-nav"> | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../index.html"> | 
|  | Overview | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../getting_started/index.html"> | 
|  | Getting Started | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../user_guide/index.html"> | 
|  | User Guides | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../reference/index.html"> | 
|  | API Reference | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../development/index.html"> | 
|  | Development | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../migration_guide/index.html"> | 
|  | Migration Guides | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  | </ul> | 
|  | </nav></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  |  | 
|  | <div class="sidebar-header-items__end"> | 
|  |  | 
|  | <div class="navbar-item"><!-- | 
|  | Licensed to the Apache Software Foundation (ASF) under one or more | 
|  | contributor license agreements.  See the NOTICE file distributed with | 
|  | this work for additional information regarding copyright ownership. | 
|  | The ASF licenses this file to You under the Apache License, Version 2.0 | 
|  | (the "License"); you may not use this file except in compliance with | 
|  | the License.  You may obtain a copy of the License at | 
|  |  | 
|  | http://www.apache.org/licenses/LICENSE-2.0 | 
|  |  | 
|  | Unless required by applicable law or agreed to in writing, software | 
|  | distributed under the License is distributed on an "AS IS" BASIS, | 
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | See the License for the specific language governing permissions and | 
|  | limitations under the License. | 
|  | --> | 
|  |  | 
|  | <div id="version-button" class="dropdown"> | 
|  | <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown"> | 
|  | 4.0.0-preview2 | 
|  | <span class="caret"></span> | 
|  | </button> | 
|  | <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button"> | 
|  | <!-- dropdown will be populated by javascript on page load --> | 
|  | </div> | 
|  | </div> | 
|  |  | 
|  | <script type="text/javascript"> | 
|  | // Function to construct the target URL from the JSON components | 
|  | function buildURL(entry) { | 
|  | var template = "https://spark.apache.org/docs/{version}/api/python/index.html";  // supplied by jinja | 
|  | template = template.replace("{version}", entry.version); | 
|  | return template; | 
|  | } | 
|  |  | 
|  | // Function to check if corresponding page path exists in other version of docs | 
|  | // and, if so, go there instead of the homepage of the other docs version | 
|  | function checkPageExistsAndRedirect(event) { | 
|  | const currentFilePath = "_modules/pyspark/ml/clustering.html", | 
|  | otherDocsHomepage = event.target.getAttribute("href"); | 
|  | let tryUrl = `${otherDocsHomepage}${currentFilePath}`; | 
|  | $.ajax({ | 
|  | type: 'HEAD', | 
|  | url: tryUrl, | 
|  | // if the page exists, go there | 
|  | success: function() { | 
|  | location.href = tryUrl; | 
|  | } | 
|  | }).fail(function() { | 
|  | location.href = otherDocsHomepage; | 
|  | }); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Function to populate the version switcher | 
|  | (function () { | 
|  | // get JSON config | 
|  | $.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) { | 
|  | // create the nodes first (before AJAX calls) to ensure the order is | 
|  | // correct (for now, links will go to doc version homepage) | 
|  | $.each(data, function(index, entry) { | 
|  | // if no custom name specified (e.g., "latest"), use version string | 
|  | if (!("name" in entry)) { | 
|  | entry.name = entry.version; | 
|  | } | 
|  | // construct the appropriate URL, and add it to the dropdown | 
|  | entry.url = buildURL(entry); | 
|  | const node = document.createElement("a"); | 
|  | node.setAttribute("class", "list-group-item list-group-item-action py-1"); | 
|  | node.setAttribute("href", `${entry.url}`); | 
|  | node.textContent = `${entry.name}`; | 
|  | node.onclick = checkPageExistsAndRedirect; | 
|  | $("#version_switcher").append(node); | 
|  | }); | 
|  | }); | 
|  | })(); | 
|  | </script></div> | 
|  |  | 
|  | <div class="navbar-item"> | 
|  | <script> | 
|  | document.write(` | 
|  | <button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip"> | 
|  | <span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span> | 
|  | <span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span> | 
|  | <span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span> | 
|  | </button> | 
|  | `); | 
|  | </script></div> | 
|  |  | 
|  | <div class="navbar-item"><ul class="navbar-icon-links navbar-nav" | 
|  | aria-label="Icon Links"> | 
|  | <li class="nav-item"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span> | 
|  | <label class="sr-only">GitHub</label></a> | 
|  | </li> | 
|  | <li class="nav-item"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span> | 
|  | <label class="sr-only">PyPI</label></a> | 
|  | </li> | 
|  | </ul></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="sidebar-primary-items__end sidebar-primary__section"> | 
|  | </div> | 
|  |  | 
|  | <div id="rtd-footer-container"></div> | 
|  |  | 
|  |  | 
|  | </div> | 
|  |  | 
|  | <main id="main-content" class="bd-main"> | 
|  |  | 
|  |  | 
|  | <div class="bd-content"> | 
|  | <div class="bd-article-container"> | 
|  |  | 
|  | <div class="bd-header-article"> | 
|  | <div class="header-article-items header-article__inner"> | 
|  |  | 
|  | <div class="header-article-items__start"> | 
|  |  | 
|  | <div class="header-article-item"> | 
|  |  | 
|  |  | 
|  |  | 
|  | <nav aria-label="Breadcrumbs"> | 
|  | <ul class="bd-breadcrumbs" role="navigation" aria-label="Breadcrumb"> | 
|  |  | 
|  | <li class="breadcrumb-item breadcrumb-home"> | 
|  | <a href="../../../index.html" class="nav-link" aria-label="Home"> | 
|  | <i class="fa-solid fa-home"></i> | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  | <li class="breadcrumb-item"><a href="../../index.html" class="nav-link">Module code</a></li> | 
|  |  | 
|  | <li class="breadcrumb-item active" aria-current="page">pyspark.ml.clustering</li> | 
|  | </ul> | 
|  | </nav> | 
|  | </div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | </div> | 
|  | </div> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <div id="searchbox"></div> | 
|  | <article class="bd-article" role="main"> | 
|  |  | 
|  | <h1>Source code for pyspark.ml.clustering</h1><div class="highlight"><pre> | 
|  | <span></span><span class="c1">#</span> | 
|  | <span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span> | 
|  | <span class="c1"># contributor license agreements.  See the NOTICE file distributed with</span> | 
|  | <span class="c1"># this work for additional information regarding copyright ownership.</span> | 
|  | <span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span> | 
|  | <span class="c1"># (the "License"); you may not use this file except in compliance with</span> | 
|  | <span class="c1"># the License.  You may obtain a copy of the License at</span> | 
|  | <span class="c1">#</span> | 
|  | <span class="c1">#    http://www.apache.org/licenses/LICENSE-2.0</span> | 
|  | <span class="c1">#</span> | 
|  | <span class="c1"># Unless required by applicable law or agreed to in writing, software</span> | 
|  | <span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span> | 
|  | <span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> | 
|  | <span class="c1"># See the License for the specific language governing permissions and</span> | 
|  | <span class="c1"># limitations under the License.</span> | 
|  | <span class="c1">#</span> | 
|  |  | 
|  | <span class="kn">import</span> <span class="nn">sys</span> | 
|  | <span class="kn">import</span> <span class="nn">warnings</span> | 
|  | <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">TYPE_CHECKING</span> | 
|  |  | 
|  | <span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span> | 
|  |  | 
|  | <span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">since</span><span class="p">,</span> <span class="n">keyword_only</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.ml.param.shared</span> <span class="kn">import</span> <span class="p">(</span> | 
|  | <span class="n">HasMaxIter</span><span class="p">,</span> | 
|  | <span class="n">HasFeaturesCol</span><span class="p">,</span> | 
|  | <span class="n">HasSeed</span><span class="p">,</span> | 
|  | <span class="n">HasPredictionCol</span><span class="p">,</span> | 
|  | <span class="n">HasAggregationDepth</span><span class="p">,</span> | 
|  | <span class="n">HasWeightCol</span><span class="p">,</span> | 
|  | <span class="n">HasTol</span><span class="p">,</span> | 
|  | <span class="n">HasProbabilityCol</span><span class="p">,</span> | 
|  | <span class="n">HasDistanceMeasure</span><span class="p">,</span> | 
|  | <span class="n">HasCheckpointInterval</span><span class="p">,</span> | 
|  | <span class="n">HasSolver</span><span class="p">,</span> | 
|  | <span class="n">HasMaxBlockSizeInMB</span><span class="p">,</span> | 
|  | <span class="n">Param</span><span class="p">,</span> | 
|  | <span class="n">Params</span><span class="p">,</span> | 
|  | <span class="n">TypeConverters</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.ml.util</span> <span class="kn">import</span> <span class="p">(</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">,</span> | 
|  | <span class="n">GeneralJavaMLWritable</span><span class="p">,</span> | 
|  | <span class="n">HasTrainingSummary</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.ml.wrapper</span> <span class="kn">import</span> <span class="n">JavaEstimator</span><span class="p">,</span> <span class="n">JavaModel</span><span class="p">,</span> <span class="n">JavaParams</span><span class="p">,</span> <span class="n">JavaWrapper</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.ml.common</span> <span class="kn">import</span> <span class="n">inherit_doc</span><span class="p">,</span> <span class="n">_java2py</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.ml.stat</span> <span class="kn">import</span> <span class="n">MultivariateGaussian</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">DataFrame</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.ml.linalg</span> <span class="kn">import</span> <span class="n">Vector</span><span class="p">,</span> <span class="n">Matrix</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.ml._typing</span> <span class="kn">import</span> <span class="n">M</span> | 
|  | <span class="kn">from</span> <span class="nn">py4j.java_gateway</span> <span class="kn">import</span> <span class="n">JavaObject</span> | 
|  |  | 
|  |  | 
|  | <span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span> | 
|  | <span class="s2">"BisectingKMeans"</span><span class="p">,</span> | 
|  | <span class="s2">"BisectingKMeansModel"</span><span class="p">,</span> | 
|  | <span class="s2">"BisectingKMeansSummary"</span><span class="p">,</span> | 
|  | <span class="s2">"KMeans"</span><span class="p">,</span> | 
|  | <span class="s2">"KMeansModel"</span><span class="p">,</span> | 
|  | <span class="s2">"KMeansSummary"</span><span class="p">,</span> | 
|  | <span class="s2">"GaussianMixture"</span><span class="p">,</span> | 
|  | <span class="s2">"GaussianMixtureModel"</span><span class="p">,</span> | 
|  | <span class="s2">"GaussianMixtureSummary"</span><span class="p">,</span> | 
|  | <span class="s2">"LDA"</span><span class="p">,</span> | 
|  | <span class="s2">"LDAModel"</span><span class="p">,</span> | 
|  | <span class="s2">"LocalLDAModel"</span><span class="p">,</span> | 
|  | <span class="s2">"DistributedLDAModel"</span><span class="p">,</span> | 
|  | <span class="s2">"PowerIterationClustering"</span><span class="p">,</span> | 
|  | <span class="p">]</span> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">ClusteringSummary</span><span class="p">(</span><span class="n">JavaWrapper</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Clustering results for a given model.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.1.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">predictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Name for column of predicted clusters in `predictions`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"predictionCol"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">predictions</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        DataFrame produced by the model's `transform` method.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"predictions"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">featuresCol</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Name for column of features in `predictions`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"featuresCol"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">k</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        The number of clusters the model was trained with.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"k"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">cluster</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        DataFrame of predicted cluster centers for each training data point.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"cluster"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">clusterSizes</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Size of (number of data points in) each cluster.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"clusterSizes"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">numIter</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Number of iterations.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"numIter"</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">_GaussianMixtureParams</span><span class="p">(</span> | 
|  | <span class="n">HasMaxIter</span><span class="p">,</span> | 
|  | <span class="n">HasFeaturesCol</span><span class="p">,</span> | 
|  | <span class="n">HasSeed</span><span class="p">,</span> | 
|  | <span class="n">HasPredictionCol</span><span class="p">,</span> | 
|  | <span class="n">HasProbabilityCol</span><span class="p">,</span> | 
|  | <span class="n">HasTol</span><span class="p">,</span> | 
|  | <span class="n">HasAggregationDepth</span><span class="p">,</span> | 
|  | <span class="n">HasWeightCol</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`GaussianMixture` and :py:class:`GaussianMixtureModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">k</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"k"</span><span class="p">,</span> | 
|  | <span class="s2">"Number of independent Gaussians in the mixture model. "</span> <span class="o">+</span> <span class="s2">"Must be > 1."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_GaussianMixtureParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">k</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">tol</span><span class="o">=</span><span class="mf">0.01</span><span class="p">,</span> <span class="n">maxIter</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> <span class="n">aggregationDepth</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getK</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of `k`</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">k</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixtureModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixtureModel.html#pyspark.ml.clustering.GaussianMixtureModel">[docs]</a><span class="k">class</span> <span class="nc">GaussianMixtureModel</span><span class="p">(</span> | 
|  | <span class="n">JavaModel</span><span class="p">,</span> | 
|  | <span class="n">_GaussianMixtureParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"GaussianMixtureModel"</span><span class="p">],</span> | 
|  | <span class="n">HasTrainingSummary</span><span class="p">[</span><span class="s2">"GaussianMixtureSummary"</span><span class="p">],</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by GaussianMixture.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixtureModel.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixtureModel.html#pyspark.ml.clustering.GaussianMixtureModel.setFeaturesCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GaussianMixtureModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featuresCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixtureModel.setPredictionCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixtureModel.html#pyspark.ml.clustering.GaussianMixtureModel.setPredictionCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setPredictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GaussianMixtureModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`predictionCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">predictionCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixtureModel.setProbabilityCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixtureModel.html#pyspark.ml.clustering.GaussianMixtureModel.setProbabilityCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setProbabilityCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GaussianMixtureModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`probabilityCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">probabilityCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">weights</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Weight for each Gaussian distribution in the mixture.</span> | 
|  | <span class="sd">        This is a multinomial probability distribution over the k Gaussians,</span> | 
|  | <span class="sd">        where weights[i] is the weight for Gaussian i, and weights sum to 1.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"weights"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">gaussians</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">MultivariateGaussian</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Array of :py:class:`MultivariateGaussian` where gaussians[i] represents</span> | 
|  | <span class="sd">        the Multivariate Gaussian (Normal) Distribution for Gaussian i</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.core.context</span> <span class="kn">import</span> <span class="n">SparkContext</span> | 
|  |  | 
|  | <span class="n">sc</span> <span class="o">=</span> <span class="n">SparkContext</span><span class="o">.</span><span class="n">_active_spark_context</span> | 
|  | <span class="k">assert</span> <span class="n">sc</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> | 
|  |  | 
|  | <span class="n">jgaussians</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span><span class="o">.</span><span class="n">gaussians</span><span class="p">()</span> | 
|  | <span class="k">return</span> <span class="p">[</span> | 
|  | <span class="n">MultivariateGaussian</span><span class="p">(</span><span class="n">_java2py</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">jgaussian</span><span class="o">.</span><span class="n">mean</span><span class="p">()),</span> <span class="n">_java2py</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">jgaussian</span><span class="o">.</span><span class="n">cov</span><span class="p">()))</span> | 
|  | <span class="k">for</span> <span class="n">jgaussian</span> <span class="ow">in</span> <span class="n">jgaussians</span> | 
|  | <span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">gaussiansDF</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Retrieve Gaussian distributions as a DataFrame.</span> | 
|  | <span class="sd">        Each row represents a Gaussian Distribution.</span> | 
|  | <span class="sd">        The DataFrame has two columns: mean (Vector) and cov (Matrix).</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"gaussiansDF"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">summary</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GaussianMixtureSummary"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets summary (cluster assignments, cluster sizes) of the model trained on the</span> | 
|  | <span class="sd">        training set. An exception is thrown if no summary exists.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">hasSummary</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">GaussianMixtureSummary</span><span class="p">(</span><span class="nb">super</span><span class="p">(</span><span class="n">GaussianMixtureModel</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">summary</span><span class="p">)</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span> | 
|  | <span class="s2">"No training summary available for this </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixtureModel.predict"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixtureModel.html#pyspark.ml.clustering.GaussianMixtureModel.predict">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">Vector</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Predict label for the given features.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"predict"</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixtureModel.predictProbability"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixtureModel.html#pyspark.ml.clustering.GaussianMixtureModel.predictProbability">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">predictProbability</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">Vector</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Predict probability for the given features.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"predictProbability"</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixture"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixture.html#pyspark.ml.clustering.GaussianMixture">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">GaussianMixture</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">[</span><span class="n">GaussianMixtureModel</span><span class="p">],</span> | 
|  | <span class="n">_GaussianMixtureParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"GaussianMixture"</span><span class="p">],</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    GaussianMixture clustering.</span> | 
|  | <span class="sd">    This class performs expectation maximization for multivariate Gaussian</span> | 
|  | <span class="sd">    Mixture Models (GMMs).  A GMM represents a composite distribution of</span> | 
|  | <span class="sd">    independent Gaussian distributions with associated "mixing" weights</span> | 
|  | <span class="sd">    specifying each's contribution to the composite.</span> | 
|  |  | 
|  | <span class="sd">    Given a set of sample points, this class will maximize the log-likelihood</span> | 
|  | <span class="sd">    for a mixture of k Gaussians, iterating until the log-likelihood changes by</span> | 
|  | <span class="sd">    less than convergenceTol, or until it has reached the max number of iterations.</span> | 
|  | <span class="sd">    While this process is generally guaranteed to converge, it is not guaranteed</span> | 
|  | <span class="sd">    to find a global optimum.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.0.0</span> | 
|  |  | 
|  | <span class="sd">    Notes</span> | 
|  | <span class="sd">    -----</span> | 
|  | <span class="sd">    For high-dimensional data (with many features), this algorithm may perform poorly.</span> | 
|  | <span class="sd">    This is due to high-dimensional data (a) making it difficult to cluster at all</span> | 
|  | <span class="sd">    (based on statistical/theoretical arguments) and (b) numerical issues with</span> | 
|  | <span class="sd">    Gaussian distributions.</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  |  | 
|  | <span class="sd">    >>> data = [(Vectors.dense([-0.1, -0.05 ]),),</span> | 
|  | <span class="sd">    ...         (Vectors.dense([-0.01, -0.1]),),</span> | 
|  | <span class="sd">    ...         (Vectors.dense([0.9, 0.8]),),</span> | 
|  | <span class="sd">    ...         (Vectors.dense([0.75, 0.935]),),</span> | 
|  | <span class="sd">    ...         (Vectors.dense([-0.83, -0.68]),),</span> | 
|  | <span class="sd">    ...         (Vectors.dense([-0.91, -0.76]),)]</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame(data, ["features"])</span> | 
|  | <span class="sd">    >>> gm = GaussianMixture(k=3, tol=0.0001, seed=10)</span> | 
|  | <span class="sd">    >>> gm.getMaxIter()</span> | 
|  | <span class="sd">    100</span> | 
|  | <span class="sd">    >>> gm.setMaxIter(30)</span> | 
|  | <span class="sd">    GaussianMixture...</span> | 
|  | <span class="sd">    >>> gm.getMaxIter()</span> | 
|  | <span class="sd">    30</span> | 
|  | <span class="sd">    >>> model = gm.fit(df)</span> | 
|  | <span class="sd">    >>> model.getAggregationDepth()</span> | 
|  | <span class="sd">    2</span> | 
|  | <span class="sd">    >>> model.getFeaturesCol()</span> | 
|  | <span class="sd">    'features'</span> | 
|  | <span class="sd">    >>> model.setPredictionCol("newPrediction")</span> | 
|  | <span class="sd">    GaussianMixtureModel...</span> | 
|  | <span class="sd">    >>> model.predict(df.head().features)</span> | 
|  | <span class="sd">    2</span> | 
|  | <span class="sd">    >>> model.predictProbability(df.head().features)</span> | 
|  | <span class="sd">    DenseVector([0.0, 0.0, 1.0])</span> | 
|  | <span class="sd">    >>> model.hasSummary</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> summary = model.summary</span> | 
|  | <span class="sd">    >>> summary.k</span> | 
|  | <span class="sd">    3</span> | 
|  | <span class="sd">    >>> summary.clusterSizes</span> | 
|  | <span class="sd">    [2, 2, 2]</span> | 
|  | <span class="sd">    >>> weights = model.weights</span> | 
|  | <span class="sd">    >>> len(weights)</span> | 
|  | <span class="sd">    3</span> | 
|  | <span class="sd">    >>> gaussians = model.gaussians</span> | 
|  | <span class="sd">    >>> len(gaussians)</span> | 
|  | <span class="sd">    3</span> | 
|  | <span class="sd">    >>> gaussians[0].mean</span> | 
|  | <span class="sd">    DenseVector([0.825, 0.8675])</span> | 
|  | <span class="sd">    >>> gaussians[0].cov</span> | 
|  | <span class="sd">    DenseMatrix(2, 2, [0.0056, -0.0051, -0.0051, 0.0046], 0)</span> | 
|  | <span class="sd">    >>> gaussians[1].mean</span> | 
|  | <span class="sd">    DenseVector([-0.87, -0.72])</span> | 
|  | <span class="sd">    >>> gaussians[1].cov</span> | 
|  | <span class="sd">    DenseMatrix(2, 2, [0.0016, 0.0016, 0.0016, 0.0016], 0)</span> | 
|  | <span class="sd">    >>> gaussians[2].mean</span> | 
|  | <span class="sd">    DenseVector([-0.055, -0.075])</span> | 
|  | <span class="sd">    >>> gaussians[2].cov</span> | 
|  | <span class="sd">    DenseMatrix(2, 2, [0.002, -0.0011, -0.0011, 0.0006], 0)</span> | 
|  | <span class="sd">    >>> model.gaussiansDF.select("mean").head()</span> | 
|  | <span class="sd">    Row(mean=DenseVector([0.825, 0.8675]))</span> | 
|  | <span class="sd">    >>> model.gaussiansDF.select("cov").head()</span> | 
|  | <span class="sd">    Row(cov=DenseMatrix(2, 2, [0.0056, -0.0051, -0.0051, 0.0046], False))</span> | 
|  | <span class="sd">    >>> transformed = model.transform(df).select("features", "newPrediction")</span> | 
|  | <span class="sd">    >>> rows = transformed.collect()</span> | 
|  | <span class="sd">    >>> rows[4].newPrediction == rows[5].newPrediction</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> rows[2].newPrediction == rows[3].newPrediction</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> gmm_path = temp_path + "/gmm"</span> | 
|  | <span class="sd">    >>> gm.save(gmm_path)</span> | 
|  | <span class="sd">    >>> gm2 = GaussianMixture.load(gmm_path)</span> | 
|  | <span class="sd">    >>> gm2.getK()</span> | 
|  | <span class="sd">    3</span> | 
|  | <span class="sd">    >>> model_path = temp_path + "/gmm_model"</span> | 
|  | <span class="sd">    >>> model.save(model_path)</span> | 
|  | <span class="sd">    >>> model2 = GaussianMixtureModel.load(model_path)</span> | 
|  | <span class="sd">    >>> model2.hasSummary</span> | 
|  | <span class="sd">    False</span> | 
|  | <span class="sd">    >>> model2.weights == model.weights</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> model2.gaussians[0].mean == model.gaussians[0].mean</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> model2.gaussians[0].cov == model.gaussians[0].cov</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> model2.gaussians[1].mean == model.gaussians[1].mean</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> model2.gaussians[1].cov == model.gaussians[1].cov</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> model2.gaussians[2].mean == model.gaussians[2].mean</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> model2.gaussians[2].cov == model.gaussians[2].cov</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> model2.gaussiansDF.select("mean").head()</span> | 
|  | <span class="sd">    Row(mean=DenseVector([0.825, 0.8675]))</span> | 
|  | <span class="sd">    >>> model2.gaussiansDF.select("cov").head()</span> | 
|  | <span class="sd">    Row(cov=DenseMatrix(2, 2, [0.0056, -0.0051, -0.0051, 0.0046], False))</span> | 
|  | <span class="sd">    >>> model.transform(df).take(1) == model2.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> gm2.setWeightCol("weight")</span> | 
|  | <span class="sd">    GaussianMixture...</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> | 
|  | <span class="n">k</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> | 
|  | <span class="n">probabilityCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"probability"</span><span class="p">,</span> | 
|  | <span class="n">tol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.01</span><span class="p">,</span> | 
|  | <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span> | 
|  | <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">aggregationDepth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> | 
|  | <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \</span> | 
|  | <span class="sd">                 probabilityCol="probability", tol=0.01, maxIter=100, seed=None, \</span> | 
|  | <span class="sd">                 aggregationDepth=2, weightCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">GaussianMixture</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> | 
|  | <span class="s2">"org.apache.spark.ml.clustering.GaussianMixture"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GaussianMixtureModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">GaussianMixtureModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixture.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixture.html#pyspark.ml.clustering.GaussianMixture.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> | 
|  | <span class="n">k</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> | 
|  | <span class="n">probabilityCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"probability"</span><span class="p">,</span> | 
|  | <span class="n">tol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.01</span><span class="p">,</span> | 
|  | <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span> | 
|  | <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">aggregationDepth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> | 
|  | <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"GaussianMixture"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \</span> | 
|  | <span class="sd">                  probabilityCol="probability", tol=0.01, maxIter=100, seed=None, \</span> | 
|  | <span class="sd">                  aggregationDepth=2, weightCol=None)</span> | 
|  |  | 
|  | <span class="sd">        Sets params for GaussianMixture.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixture.setK"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixture.html#pyspark.ml.clustering.GaussianMixture.setK">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setK</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GaussianMixture"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`k`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">k</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixture.setMaxIter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixture.html#pyspark.ml.clustering.GaussianMixture.setMaxIter">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMaxIter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GaussianMixture"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`maxIter`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixture.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixture.html#pyspark.ml.clustering.GaussianMixture.setFeaturesCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GaussianMixture"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featuresCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixture.setPredictionCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixture.html#pyspark.ml.clustering.GaussianMixture.setPredictionCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setPredictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GaussianMixture"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`predictionCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">predictionCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixture.setProbabilityCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixture.html#pyspark.ml.clustering.GaussianMixture.setProbabilityCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setProbabilityCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GaussianMixture"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`probabilityCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">probabilityCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixture.setWeightCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixture.html#pyspark.ml.clustering.GaussianMixture.setWeightCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setWeightCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GaussianMixture"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`weightCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">weightCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixture.setSeed"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixture.html#pyspark.ml.clustering.GaussianMixture.setSeed">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setSeed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GaussianMixture"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`seed`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixture.setTol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixture.html#pyspark.ml.clustering.GaussianMixture.setTol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setTol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GaussianMixture"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`tol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">tol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixture.setAggregationDepth"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixture.html#pyspark.ml.clustering.GaussianMixture.setAggregationDepth">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setAggregationDepth</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GaussianMixture"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`aggregationDepth`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">aggregationDepth</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="GaussianMixtureSummary"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.GaussianMixtureSummary.html#pyspark.ml.clustering.GaussianMixtureSummary">[docs]</a><span class="k">class</span> <span class="nc">GaussianMixtureSummary</span><span class="p">(</span><span class="n">ClusteringSummary</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Gaussian mixture clustering results for a given model.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.1.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">probabilityCol</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Name for column of predicted probability of each cluster in `predictions`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"probabilityCol"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">probability</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        DataFrame of probabilities of each cluster for each training data point.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"probability"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">logLikelihood</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Total log-likelihood for this model on the given data.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"logLikelihood"</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="KMeansSummary"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeansSummary.html#pyspark.ml.clustering.KMeansSummary">[docs]</a><span class="k">class</span> <span class="nc">KMeansSummary</span><span class="p">(</span><span class="n">ClusteringSummary</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Summary of KMeans.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.1.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">trainingCost</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        K-means cost (sum of squared distances to the nearest centroid for all points in the</span> | 
|  | <span class="sd">        training dataset). This is equivalent to sklearn's inertia.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"trainingCost"</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">_KMeansParams</span><span class="p">(</span> | 
|  | <span class="n">HasMaxIter</span><span class="p">,</span> | 
|  | <span class="n">HasFeaturesCol</span><span class="p">,</span> | 
|  | <span class="n">HasSeed</span><span class="p">,</span> | 
|  | <span class="n">HasPredictionCol</span><span class="p">,</span> | 
|  | <span class="n">HasTol</span><span class="p">,</span> | 
|  | <span class="n">HasDistanceMeasure</span><span class="p">,</span> | 
|  | <span class="n">HasWeightCol</span><span class="p">,</span> | 
|  | <span class="n">HasSolver</span><span class="p">,</span> | 
|  | <span class="n">HasMaxBlockSizeInMB</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`KMeans` and :py:class:`KMeansModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">k</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"k"</span><span class="p">,</span> | 
|  | <span class="s2">"The number of clusters to create. Must be > 1."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">initMode</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"initMode"</span><span class="p">,</span> | 
|  | <span class="s1">'The initialization algorithm. This can be either "random" to '</span> | 
|  | <span class="o">+</span> <span class="s1">'choose random points as initial cluster centers, or "k-means||" '</span> | 
|  | <span class="o">+</span> <span class="s2">"to use a parallel variant of k-means++"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">initSteps</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"initSteps"</span><span class="p">,</span> | 
|  | <span class="s2">"The number of steps for k-means|| "</span> <span class="o">+</span> <span class="s2">"initialization mode. Must be > 0."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">solver</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"solver"</span><span class="p">,</span> | 
|  | <span class="s2">"The solver algorithm for optimization. Supported "</span> <span class="o">+</span> <span class="s2">"options: auto, row, block."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_KMeansParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span> | 
|  | <span class="n">k</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> | 
|  | <span class="n">initMode</span><span class="o">=</span><span class="s2">"k-means||"</span><span class="p">,</span> | 
|  | <span class="n">initSteps</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> | 
|  | <span class="n">tol</span><span class="o">=</span><span class="mf">1e-4</span><span class="p">,</span> | 
|  | <span class="n">maxIter</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> | 
|  | <span class="n">distanceMeasure</span><span class="o">=</span><span class="s2">"euclidean"</span><span class="p">,</span> | 
|  | <span class="n">solver</span><span class="o">=</span><span class="s2">"auto"</span><span class="p">,</span> | 
|  | <span class="n">maxBlockSizeInMB</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getK</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of `k`</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">k</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getInitMode</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of `initMode`</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">initMode</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getInitSteps</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of `initSteps`</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">initSteps</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="KMeansModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeansModel.html#pyspark.ml.clustering.KMeansModel">[docs]</a><span class="k">class</span> <span class="nc">KMeansModel</span><span class="p">(</span> | 
|  | <span class="n">JavaModel</span><span class="p">,</span> | 
|  | <span class="n">_KMeansParams</span><span class="p">,</span> | 
|  | <span class="n">GeneralJavaMLWritable</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"KMeansModel"</span><span class="p">],</span> | 
|  | <span class="n">HasTrainingSummary</span><span class="p">[</span><span class="s2">"KMeansSummary"</span><span class="p">],</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by KMeans.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.5.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeansModel.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeansModel.html#pyspark.ml.clustering.KMeansModel.setFeaturesCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"KMeansModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featuresCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeansModel.setPredictionCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeansModel.html#pyspark.ml.clustering.KMeansModel.setPredictionCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setPredictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"KMeansModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`predictionCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">predictionCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeansModel.clusterCenters"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeansModel.html#pyspark.ml.clustering.KMeansModel.clusterCenters">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">clusterCenters</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""Get the cluster centers, represented as a list of NumPy arrays."""</span> | 
|  | <span class="k">return</span> <span class="p">[</span><span class="n">c</span><span class="o">.</span><span class="n">toArray</span><span class="p">()</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"clusterCenters"</span><span class="p">)]</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">summary</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">KMeansSummary</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets summary (cluster assignments, cluster sizes) of the model trained on the</span> | 
|  | <span class="sd">        training set. An exception is thrown if no summary exists.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">hasSummary</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">KMeansSummary</span><span class="p">(</span><span class="nb">super</span><span class="p">(</span><span class="n">KMeansModel</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">summary</span><span class="p">)</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span> | 
|  | <span class="s2">"No training summary available for this </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeansModel.predict"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeansModel.html#pyspark.ml.clustering.KMeansModel.predict">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">Vector</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Predict label for the given features.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"predict"</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="KMeans"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeans.html#pyspark.ml.clustering.KMeans">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">KMeans</span><span class="p">(</span><span class="n">JavaEstimator</span><span class="p">[</span><span class="n">KMeansModel</span><span class="p">],</span> <span class="n">_KMeansParams</span><span class="p">,</span> <span class="n">JavaMLWritable</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"KMeans"</span><span class="p">]):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    K-means clustering with a k-means++ like initialization mode</span> | 
|  | <span class="sd">    (the k-means|| algorithm by Bahmani et al).</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.5.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> data = [(Vectors.dense([0.0, 0.0]), 2.0), (Vectors.dense([1.0, 1.0]), 2.0),</span> | 
|  | <span class="sd">    ...         (Vectors.dense([9.0, 8.0]), 2.0), (Vectors.dense([8.0, 9.0]), 2.0)]</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame(data, ["features", "weighCol"])</span> | 
|  | <span class="sd">    >>> kmeans = KMeans(k=2)</span> | 
|  | <span class="sd">    >>> kmeans.setSeed(1)</span> | 
|  | <span class="sd">    KMeans...</span> | 
|  | <span class="sd">    >>> kmeans.setWeightCol("weighCol")</span> | 
|  | <span class="sd">    KMeans...</span> | 
|  | <span class="sd">    >>> kmeans.setMaxIter(10)</span> | 
|  | <span class="sd">    KMeans...</span> | 
|  | <span class="sd">    >>> kmeans.getMaxIter()</span> | 
|  | <span class="sd">    10</span> | 
|  | <span class="sd">    >>> kmeans.clear(kmeans.maxIter)</span> | 
|  | <span class="sd">    >>> kmeans.getSolver()</span> | 
|  | <span class="sd">    'auto'</span> | 
|  | <span class="sd">    >>> model = kmeans.fit(df)</span> | 
|  | <span class="sd">    >>> model.getMaxBlockSizeInMB()</span> | 
|  | <span class="sd">    0.0</span> | 
|  | <span class="sd">    >>> model.getDistanceMeasure()</span> | 
|  | <span class="sd">    'euclidean'</span> | 
|  | <span class="sd">    >>> model.setPredictionCol("newPrediction")</span> | 
|  | <span class="sd">    KMeansModel...</span> | 
|  | <span class="sd">    >>> model.predict(df.head().features)</span> | 
|  | <span class="sd">    0</span> | 
|  | <span class="sd">    >>> centers = model.clusterCenters()</span> | 
|  | <span class="sd">    >>> len(centers)</span> | 
|  | <span class="sd">    2</span> | 
|  | <span class="sd">    >>> transformed = model.transform(df).select("features", "newPrediction")</span> | 
|  | <span class="sd">    >>> rows = transformed.collect()</span> | 
|  | <span class="sd">    >>> rows[0].newPrediction == rows[1].newPrediction</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> rows[2].newPrediction == rows[3].newPrediction</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> model.hasSummary</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> summary = model.summary</span> | 
|  | <span class="sd">    >>> summary.k</span> | 
|  | <span class="sd">    2</span> | 
|  | <span class="sd">    >>> summary.clusterSizes</span> | 
|  | <span class="sd">    [2, 2]</span> | 
|  | <span class="sd">    >>> summary.trainingCost</span> | 
|  | <span class="sd">    4.0</span> | 
|  | <span class="sd">    >>> kmeans_path = temp_path + "/kmeans"</span> | 
|  | <span class="sd">    >>> kmeans.save(kmeans_path)</span> | 
|  | <span class="sd">    >>> kmeans2 = KMeans.load(kmeans_path)</span> | 
|  | <span class="sd">    >>> kmeans2.getK()</span> | 
|  | <span class="sd">    2</span> | 
|  | <span class="sd">    >>> model_path = temp_path + "/kmeans_model"</span> | 
|  | <span class="sd">    >>> model.save(model_path)</span> | 
|  | <span class="sd">    >>> model2 = KMeansModel.load(model_path)</span> | 
|  | <span class="sd">    >>> model2.hasSummary</span> | 
|  | <span class="sd">    False</span> | 
|  | <span class="sd">    >>> model.clusterCenters()[0] == model2.clusterCenters()[0]</span> | 
|  | <span class="sd">    array([ True,  True], dtype=bool)</span> | 
|  | <span class="sd">    >>> model.clusterCenters()[1] == model2.clusterCenters()[1]</span> | 
|  | <span class="sd">    array([ True,  True], dtype=bool)</span> | 
|  | <span class="sd">    >>> model.transform(df).take(1) == model2.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> | 
|  | <span class="n">k</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> | 
|  | <span class="n">initMode</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"k-means||"</span><span class="p">,</span> | 
|  | <span class="n">initSteps</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> | 
|  | <span class="n">tol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1e-4</span><span class="p">,</span> | 
|  | <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">20</span><span class="p">,</span> | 
|  | <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">distanceMeasure</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"euclidean"</span><span class="p">,</span> | 
|  | <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">solver</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"auto"</span><span class="p">,</span> | 
|  | <span class="n">maxBlockSizeInMB</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \</span> | 
|  | <span class="sd">                 initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None, \</span> | 
|  | <span class="sd">                 distanceMeasure="euclidean", weightCol=None, solver="auto", \</span> | 
|  | <span class="sd">                 maxBlockSizeInMB=0.0)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">KMeans</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.clustering.KMeans"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="n">KMeansModel</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">KMeansModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeans.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeans.html#pyspark.ml.clustering.KMeans.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> | 
|  | <span class="n">k</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> | 
|  | <span class="n">initMode</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"k-means||"</span><span class="p">,</span> | 
|  | <span class="n">initSteps</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> | 
|  | <span class="n">tol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1e-4</span><span class="p">,</span> | 
|  | <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">20</span><span class="p">,</span> | 
|  | <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">distanceMeasure</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"euclidean"</span><span class="p">,</span> | 
|  | <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">solver</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"auto"</span><span class="p">,</span> | 
|  | <span class="n">maxBlockSizeInMB</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"KMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \</span> | 
|  | <span class="sd">                  initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None, \</span> | 
|  | <span class="sd">                  distanceMeasure="euclidean", weightCol=None, solver="auto", \</span> | 
|  | <span class="sd">                  maxBlockSizeInMB=0.0)</span> | 
|  |  | 
|  | <span class="sd">        Sets params for KMeans.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeans.setK"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeans.html#pyspark.ml.clustering.KMeans.setK">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setK</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"KMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`k`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">k</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeans.setInitMode"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeans.html#pyspark.ml.clustering.KMeans.setInitMode">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInitMode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"KMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`initMode`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">initMode</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeans.setInitSteps"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeans.html#pyspark.ml.clustering.KMeans.setInitSteps">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInitSteps</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"KMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`initSteps`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">initSteps</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeans.setDistanceMeasure"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeans.html#pyspark.ml.clustering.KMeans.setDistanceMeasure">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setDistanceMeasure</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"KMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`distanceMeasure`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">distanceMeasure</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeans.setMaxIter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeans.html#pyspark.ml.clustering.KMeans.setMaxIter">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMaxIter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"KMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`maxIter`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeans.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeans.html#pyspark.ml.clustering.KMeans.setFeaturesCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"KMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featuresCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeans.setPredictionCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeans.html#pyspark.ml.clustering.KMeans.setPredictionCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setPredictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"KMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`predictionCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">predictionCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeans.setSeed"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeans.html#pyspark.ml.clustering.KMeans.setSeed">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setSeed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"KMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`seed`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeans.setTol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeans.html#pyspark.ml.clustering.KMeans.setTol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setTol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"KMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`tol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">tol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeans.setWeightCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeans.html#pyspark.ml.clustering.KMeans.setWeightCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setWeightCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"KMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`weightCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">weightCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeans.setSolver"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeans.html#pyspark.ml.clustering.KMeans.setSolver">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setSolver</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"KMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`solver`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">solver</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="KMeans.setMaxBlockSizeInMB"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.KMeans.html#pyspark.ml.clustering.KMeans.setMaxBlockSizeInMB">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMaxBlockSizeInMB</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"KMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`maxBlockSizeInMB`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxBlockSizeInMB</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">_BisectingKMeansParams</span><span class="p">(</span> | 
|  | <span class="n">HasMaxIter</span><span class="p">,</span> | 
|  | <span class="n">HasFeaturesCol</span><span class="p">,</span> | 
|  | <span class="n">HasSeed</span><span class="p">,</span> | 
|  | <span class="n">HasPredictionCol</span><span class="p">,</span> | 
|  | <span class="n">HasDistanceMeasure</span><span class="p">,</span> | 
|  | <span class="n">HasWeightCol</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`BisectingKMeans` and :py:class:`BisectingKMeansModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">k</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"k"</span><span class="p">,</span> | 
|  | <span class="s2">"The desired number of leaf clusters. Must be > 1."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">minDivisibleClusterSize</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"minDivisibleClusterSize"</span><span class="p">,</span> | 
|  | <span class="s2">"The minimum number of points (if >= 1.0) or the minimum "</span> | 
|  | <span class="o">+</span> <span class="s2">"proportion of points (if < 1.0) of a divisible cluster."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_BisectingKMeansParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">k</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">minDivisibleClusterSize</span><span class="o">=</span><span class="mf">1.0</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getK</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of `k` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">k</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getMinDivisibleClusterSize</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of `minDivisibleClusterSize` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">minDivisibleClusterSize</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeansModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeansModel.html#pyspark.ml.clustering.BisectingKMeansModel">[docs]</a><span class="k">class</span> <span class="nc">BisectingKMeansModel</span><span class="p">(</span> | 
|  | <span class="n">JavaModel</span><span class="p">,</span> | 
|  | <span class="n">_BisectingKMeansParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"BisectingKMeansModel"</span><span class="p">],</span> | 
|  | <span class="n">HasTrainingSummary</span><span class="p">[</span><span class="s2">"BisectingKMeansSummary"</span><span class="p">],</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by BisectingKMeans.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeansModel.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeansModel.html#pyspark.ml.clustering.BisectingKMeansModel.setFeaturesCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"BisectingKMeansModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featuresCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeansModel.setPredictionCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeansModel.html#pyspark.ml.clustering.BisectingKMeansModel.setPredictionCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setPredictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"BisectingKMeansModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`predictionCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">predictionCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeansModel.clusterCenters"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeansModel.html#pyspark.ml.clustering.BisectingKMeansModel.clusterCenters">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">clusterCenters</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""Get the cluster centers, represented as a list of NumPy arrays."""</span> | 
|  | <span class="k">return</span> <span class="p">[</span><span class="n">c</span><span class="o">.</span><span class="n">toArray</span><span class="p">()</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"clusterCenters"</span><span class="p">)]</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeansModel.computeCost"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeansModel.html#pyspark.ml.clustering.BisectingKMeansModel.computeCost">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">computeCost</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Computes the sum of squared distances between the input points</span> | 
|  | <span class="sd">        and their corresponding cluster centers.</span> | 
|  |  | 
|  | <span class="sd">        .. deprecated:: 3.0.0</span> | 
|  | <span class="sd">            It will be removed in future versions. Use :py:class:`ClusteringEvaluator` instead.</span> | 
|  | <span class="sd">            You can also get the cost on the training dataset in the summary.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span> | 
|  | <span class="s2">"Deprecated in 3.0.0. It will be removed in future versions. Use "</span> | 
|  | <span class="s2">"ClusteringEvaluator instead. You can also get the cost on the training "</span> | 
|  | <span class="s2">"dataset in the summary."</span><span class="p">,</span> | 
|  | <span class="ne">FutureWarning</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"computeCost"</span><span class="p">,</span> <span class="n">dataset</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">summary</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"BisectingKMeansSummary"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets summary (cluster assignments, cluster sizes) of the model trained on the</span> | 
|  | <span class="sd">        training set. An exception is thrown if no summary exists.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">hasSummary</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">BisectingKMeansSummary</span><span class="p">(</span><span class="nb">super</span><span class="p">(</span><span class="n">BisectingKMeansModel</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">summary</span><span class="p">)</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span> | 
|  | <span class="s2">"No training summary available for this </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeansModel.predict"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeansModel.html#pyspark.ml.clustering.BisectingKMeansModel.predict">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">Vector</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Predict label for the given features.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"predict"</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeans"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeans.html#pyspark.ml.clustering.BisectingKMeans">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">BisectingKMeans</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">[</span><span class="n">BisectingKMeansModel</span><span class="p">],</span> | 
|  | <span class="n">_BisectingKMeansParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"BisectingKMeans"</span><span class="p">],</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    A bisecting k-means algorithm based on the paper "A comparison of document clustering</span> | 
|  | <span class="sd">    techniques" by Steinbach, Karypis, and Kumar, with modification to fit Spark.</span> | 
|  | <span class="sd">    The algorithm starts from a single cluster that contains all points.</span> | 
|  | <span class="sd">    Iteratively it finds divisible clusters on the bottom level and bisects each of them using</span> | 
|  | <span class="sd">    k-means, until there are `k` leaf clusters in total or no leaf clusters are divisible.</span> | 
|  | <span class="sd">    The bisecting steps of clusters on the same level are grouped together to increase parallelism.</span> | 
|  | <span class="sd">    If bisecting all divisible clusters on the bottom level would result more than `k` leaf</span> | 
|  | <span class="sd">    clusters, larger clusters get higher priority.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.0.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> data = [(Vectors.dense([0.0, 0.0]), 2.0), (Vectors.dense([1.0, 1.0]), 2.0),</span> | 
|  | <span class="sd">    ...         (Vectors.dense([9.0, 8.0]), 2.0), (Vectors.dense([8.0, 9.0]), 2.0)]</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame(data, ["features", "weighCol"])</span> | 
|  | <span class="sd">    >>> bkm = BisectingKMeans(k=2, minDivisibleClusterSize=1.0)</span> | 
|  | <span class="sd">    >>> bkm.setMaxIter(10)</span> | 
|  | <span class="sd">    BisectingKMeans...</span> | 
|  | <span class="sd">    >>> bkm.getMaxIter()</span> | 
|  | <span class="sd">    10</span> | 
|  | <span class="sd">    >>> bkm.clear(bkm.maxIter)</span> | 
|  | <span class="sd">    >>> bkm.setSeed(1)</span> | 
|  | <span class="sd">    BisectingKMeans...</span> | 
|  | <span class="sd">    >>> bkm.setWeightCol("weighCol")</span> | 
|  | <span class="sd">    BisectingKMeans...</span> | 
|  | <span class="sd">    >>> bkm.getSeed()</span> | 
|  | <span class="sd">    1</span> | 
|  | <span class="sd">    >>> bkm.clear(bkm.seed)</span> | 
|  | <span class="sd">    >>> model = bkm.fit(df)</span> | 
|  | <span class="sd">    >>> model.getMaxIter()</span> | 
|  | <span class="sd">    20</span> | 
|  | <span class="sd">    >>> model.setPredictionCol("newPrediction")</span> | 
|  | <span class="sd">    BisectingKMeansModel...</span> | 
|  | <span class="sd">    >>> model.predict(df.head().features)</span> | 
|  | <span class="sd">    0</span> | 
|  | <span class="sd">    >>> centers = model.clusterCenters()</span> | 
|  | <span class="sd">    >>> len(centers)</span> | 
|  | <span class="sd">    2</span> | 
|  | <span class="sd">    >>> model.computeCost(df)</span> | 
|  | <span class="sd">    2.0</span> | 
|  | <span class="sd">    >>> model.hasSummary</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> summary = model.summary</span> | 
|  | <span class="sd">    >>> summary.k</span> | 
|  | <span class="sd">    2</span> | 
|  | <span class="sd">    >>> summary.clusterSizes</span> | 
|  | <span class="sd">    [2, 2]</span> | 
|  | <span class="sd">    >>> summary.trainingCost</span> | 
|  | <span class="sd">    4.000...</span> | 
|  | <span class="sd">    >>> transformed = model.transform(df).select("features", "newPrediction")</span> | 
|  | <span class="sd">    >>> rows = transformed.collect()</span> | 
|  | <span class="sd">    >>> rows[0].newPrediction == rows[1].newPrediction</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> rows[2].newPrediction == rows[3].newPrediction</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> bkm_path = temp_path + "/bkm"</span> | 
|  | <span class="sd">    >>> bkm.save(bkm_path)</span> | 
|  | <span class="sd">    >>> bkm2 = BisectingKMeans.load(bkm_path)</span> | 
|  | <span class="sd">    >>> bkm2.getK()</span> | 
|  | <span class="sd">    2</span> | 
|  | <span class="sd">    >>> bkm2.getDistanceMeasure()</span> | 
|  | <span class="sd">    'euclidean'</span> | 
|  | <span class="sd">    >>> model_path = temp_path + "/bkm_model"</span> | 
|  | <span class="sd">    >>> model.save(model_path)</span> | 
|  | <span class="sd">    >>> model2 = BisectingKMeansModel.load(model_path)</span> | 
|  | <span class="sd">    >>> model2.hasSummary</span> | 
|  | <span class="sd">    False</span> | 
|  | <span class="sd">    >>> model.clusterCenters()[0] == model2.clusterCenters()[0]</span> | 
|  | <span class="sd">    array([ True,  True], dtype=bool)</span> | 
|  | <span class="sd">    >>> model.clusterCenters()[1] == model2.clusterCenters()[1]</span> | 
|  | <span class="sd">    array([ True,  True], dtype=bool)</span> | 
|  | <span class="sd">    >>> model.transform(df).take(1) == model2.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> | 
|  | <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">20</span><span class="p">,</span> | 
|  | <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">k</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">4</span><span class="p">,</span> | 
|  | <span class="n">minDivisibleClusterSize</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> | 
|  | <span class="n">distanceMeasure</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"euclidean"</span><span class="p">,</span> | 
|  | <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, featuresCol="features", predictionCol="prediction", maxIter=20, \</span> | 
|  | <span class="sd">                 seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean", \</span> | 
|  | <span class="sd">                 weightCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">BisectingKMeans</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> | 
|  | <span class="s2">"org.apache.spark.ml.clustering.BisectingKMeans"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeans.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeans.html#pyspark.ml.clustering.BisectingKMeans.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> | 
|  | <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">20</span><span class="p">,</span> | 
|  | <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">k</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">4</span><span class="p">,</span> | 
|  | <span class="n">minDivisibleClusterSize</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> | 
|  | <span class="n">distanceMeasure</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"euclidean"</span><span class="p">,</span> | 
|  | <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"BisectingKMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, featuresCol="features", predictionCol="prediction", maxIter=20, \</span> | 
|  | <span class="sd">                  seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean", \</span> | 
|  | <span class="sd">                  weightCol=None)</span> | 
|  | <span class="sd">        Sets params for BisectingKMeans.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeans.setK"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeans.html#pyspark.ml.clustering.BisectingKMeans.setK">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setK</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"BisectingKMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`k`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">k</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeans.setMinDivisibleClusterSize"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeans.html#pyspark.ml.clustering.BisectingKMeans.setMinDivisibleClusterSize">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMinDivisibleClusterSize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"BisectingKMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`minDivisibleClusterSize`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minDivisibleClusterSize</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeans.setDistanceMeasure"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeans.html#pyspark.ml.clustering.BisectingKMeans.setDistanceMeasure">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setDistanceMeasure</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"BisectingKMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`distanceMeasure`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">distanceMeasure</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeans.setMaxIter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeans.html#pyspark.ml.clustering.BisectingKMeans.setMaxIter">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMaxIter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"BisectingKMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`maxIter`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeans.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeans.html#pyspark.ml.clustering.BisectingKMeans.setFeaturesCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"BisectingKMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featuresCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeans.setPredictionCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeans.html#pyspark.ml.clustering.BisectingKMeans.setPredictionCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setPredictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"BisectingKMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`predictionCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">predictionCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeans.setSeed"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeans.html#pyspark.ml.clustering.BisectingKMeans.setSeed">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setSeed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"BisectingKMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`seed`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeans.setWeightCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeans.html#pyspark.ml.clustering.BisectingKMeans.setWeightCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setWeightCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"BisectingKMeans"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`weightCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">weightCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="n">BisectingKMeansModel</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">BisectingKMeansModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="BisectingKMeansSummary"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.BisectingKMeansSummary.html#pyspark.ml.clustering.BisectingKMeansSummary">[docs]</a><span class="k">class</span> <span class="nc">BisectingKMeansSummary</span><span class="p">(</span><span class="n">ClusteringSummary</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Bisecting KMeans clustering results for a given model.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.1.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">trainingCost</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sum of squared distances to the nearest centroid for all points in the training dataset.</span> | 
|  | <span class="sd">        This is equivalent to sklearn's inertia.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"trainingCost"</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">_LDAParams</span><span class="p">(</span><span class="n">HasMaxIter</span><span class="p">,</span> <span class="n">HasFeaturesCol</span><span class="p">,</span> <span class="n">HasSeed</span><span class="p">,</span> <span class="n">HasCheckpointInterval</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`LDA` and :py:class:`LDAModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">k</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"k"</span><span class="p">,</span> | 
|  | <span class="s2">"The number of topics (clusters) to infer. Must be > 1."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">optimizer</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"optimizer"</span><span class="p">,</span> | 
|  | <span class="s2">"Optimizer or inference algorithm used to estimate the LDA model.  "</span> | 
|  | <span class="s2">"Supported: online, em"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">learningOffset</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"learningOffset"</span><span class="p">,</span> | 
|  | <span class="s2">"A (positive) learning parameter that downweights early iterations."</span> | 
|  | <span class="s2">" Larger values make early iterations count less"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">learningDecay</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"learningDecay"</span><span class="p">,</span> | 
|  | <span class="s2">"Learning rate, set as an"</span> | 
|  | <span class="s2">"exponential decay rate. This should be between (0.5, 1.0] to "</span> | 
|  | <span class="s2">"guarantee asymptotic convergence."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">subsamplingRate</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"subsamplingRate"</span><span class="p">,</span> | 
|  | <span class="s2">"Fraction of the corpus to be sampled and used in each iteration "</span> | 
|  | <span class="s2">"of mini-batch gradient descent, in range (0, 1]."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">optimizeDocConcentration</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"optimizeDocConcentration"</span><span class="p">,</span> | 
|  | <span class="s2">"Indicates whether the docConcentration (Dirichlet parameter "</span> | 
|  | <span class="s2">"for document-topic distribution) will be optimized during "</span> | 
|  | <span class="s2">"training."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toBoolean</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">docConcentration</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"docConcentration"</span><span class="p">,</span> | 
|  | <span class="s1">'Concentration parameter (commonly named "alpha") for the '</span> | 
|  | <span class="s1">'prior placed on documents</span><span class="se">\'</span><span class="s1"> distributions over topics ("theta").'</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toListFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">topicConcentration</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"topicConcentration"</span><span class="p">,</span> | 
|  | <span class="s1">'Concentration parameter (commonly named "beta" or "eta") for '</span> | 
|  | <span class="s2">"the prior placed on topic' distributions over terms."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">topicDistributionCol</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"topicDistributionCol"</span><span class="p">,</span> | 
|  | <span class="s2">"Output column with estimates of the topic mixture distribution "</span> | 
|  | <span class="s1">'for each document (often called "theta" in the literature). '</span> | 
|  | <span class="s2">"Returns a vector of zeros for an empty document."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">keepLastCheckpoint</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"keepLastCheckpoint"</span><span class="p">,</span> | 
|  | <span class="s2">"(For EM optimizer) If using checkpointing, this indicates whether"</span> | 
|  | <span class="s2">" to keep the last checkpoint. If false, then the checkpoint will be"</span> | 
|  | <span class="s2">" deleted. Deleting the checkpoint can cause failures if a data"</span> | 
|  | <span class="s2">" partition is lost, so set this bit with care."</span><span class="p">,</span> | 
|  | <span class="n">TypeConverters</span><span class="o">.</span><span class="n">toBoolean</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_LDAParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span> | 
|  | <span class="n">maxIter</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> | 
|  | <span class="n">checkpointInterval</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> | 
|  | <span class="n">k</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> | 
|  | <span class="n">optimizer</span><span class="o">=</span><span class="s2">"online"</span><span class="p">,</span> | 
|  | <span class="n">learningOffset</span><span class="o">=</span><span class="mf">1024.0</span><span class="p">,</span> | 
|  | <span class="n">learningDecay</span><span class="o">=</span><span class="mf">0.51</span><span class="p">,</span> | 
|  | <span class="n">subsamplingRate</span><span class="o">=</span><span class="mf">0.05</span><span class="p">,</span> | 
|  | <span class="n">optimizeDocConcentration</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">topicDistributionCol</span><span class="o">=</span><span class="s2">"topicDistribution"</span><span class="p">,</span> | 
|  | <span class="n">keepLastCheckpoint</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getK</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`k` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">k</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getOptimizer</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`optimizer` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">optimizer</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getLearningOffset</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`learningOffset` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">learningOffset</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getLearningDecay</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`learningDecay` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">learningDecay</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getSubsamplingRate</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`subsamplingRate` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">subsamplingRate</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getOptimizeDocConcentration</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`optimizeDocConcentration` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">optimizeDocConcentration</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getDocConcentration</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`docConcentration` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">docConcentration</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getTopicConcentration</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`topicConcentration` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">topicConcentration</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getTopicDistributionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`topicDistributionCol` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">topicDistributionCol</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getKeepLastCheckpoint</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`keepLastCheckpoint` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">keepLastCheckpoint</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="LDAModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDAModel.html#pyspark.ml.clustering.LDAModel">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">LDAModel</span><span class="p">(</span><span class="n">JavaModel</span><span class="p">,</span> <span class="n">_LDAParams</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Latent Dirichlet Allocation (LDA) model.</span> | 
|  | <span class="sd">    This abstraction permits for different underlying representations,</span> | 
|  | <span class="sd">    including local and distributed data structures.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="LDAModel.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDAModel.html#pyspark.ml.clustering.LDAModel.setFeaturesCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="s2">"M"</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"M"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featuresCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDAModel.setSeed"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDAModel.html#pyspark.ml.clustering.LDAModel.setSeed">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setSeed</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="s2">"M"</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"M"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`seed`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDAModel.setTopicDistributionCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDAModel.html#pyspark.ml.clustering.LDAModel.setTopicDistributionCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setTopicDistributionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="s2">"M"</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"M"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`topicDistributionCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">topicDistributionCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDAModel.isDistributed"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDAModel.html#pyspark.ml.clustering.LDAModel.isDistributed">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">isDistributed</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Indicates whether this instance is of type DistributedLDAModel</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"isDistributed"</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDAModel.vocabSize"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDAModel.html#pyspark.ml.clustering.LDAModel.vocabSize">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">vocabSize</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""Vocabulary size (number of terms or words in the vocabulary)"""</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"vocabSize"</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDAModel.topicsMatrix"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDAModel.html#pyspark.ml.clustering.LDAModel.topicsMatrix">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">topicsMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Matrix</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Inferred topics, where each topic is represented by a distribution over terms.</span> | 
|  | <span class="sd">        This is a matrix of size vocabSize x k, where each column is a topic.</span> | 
|  | <span class="sd">        No guarantees are given about the ordering of the topics.</span> | 
|  |  | 
|  | <span class="sd">        .. warning:: If this model is actually a :py:class:`DistributedLDAModel`</span> | 
|  | <span class="sd">            instance produced by the Expectation-Maximization ("em") `optimizer`,</span> | 
|  | <span class="sd">            then this method could involve collecting a large amount of data</span> | 
|  | <span class="sd">            to the driver (on the order of vocabSize x k).</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"topicsMatrix"</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDAModel.logLikelihood"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDAModel.html#pyspark.ml.clustering.LDAModel.logLikelihood">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">logLikelihood</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Calculates a lower bound on the log likelihood of the entire corpus.</span> | 
|  | <span class="sd">        See Equation (16) in the Online LDA paper (Hoffman et al., 2010).</span> | 
|  |  | 
|  | <span class="sd">        .. warning:: If this model is an instance of :py:class:`DistributedLDAModel` (produced when</span> | 
|  | <span class="sd">            :py:attr:`optimizer` is set to "em"), this involves collecting a large</span> | 
|  | <span class="sd">            :py:func:`topicsMatrix` to the driver. This implementation may be changed in the future.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"logLikelihood"</span><span class="p">,</span> <span class="n">dataset</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDAModel.logPerplexity"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDAModel.html#pyspark.ml.clustering.LDAModel.logPerplexity">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">logPerplexity</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Calculate an upper bound on perplexity.  (Lower is better.)</span> | 
|  | <span class="sd">        See Equation (16) in the Online LDA paper (Hoffman et al., 2010).</span> | 
|  |  | 
|  | <span class="sd">        .. warning:: If this model is an instance of :py:class:`DistributedLDAModel` (produced when</span> | 
|  | <span class="sd">            :py:attr:`optimizer` is set to "em"), this involves collecting a large</span> | 
|  | <span class="sd">            :py:func:`topicsMatrix` to the driver. This implementation may be changed in the future.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"logPerplexity"</span><span class="p">,</span> <span class="n">dataset</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDAModel.describeTopics"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDAModel.html#pyspark.ml.clustering.LDAModel.describeTopics">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">describeTopics</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">maxTermsPerTopic</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Return the topics described by their top-weighted terms.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"describeTopics"</span><span class="p">,</span> <span class="n">maxTermsPerTopic</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDAModel.estimatedDocConcentration"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDAModel.html#pyspark.ml.clustering.LDAModel.estimatedDocConcentration">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">estimatedDocConcentration</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Value for :py:attr:`LDA.docConcentration` estimated from data.</span> | 
|  | <span class="sd">        If Online LDA was used and :py:attr:`LDA.optimizeDocConcentration` was set to false,</span> | 
|  | <span class="sd">        then this returns the fixed (given) value for the :py:attr:`LDA.docConcentration` parameter.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"estimatedDocConcentration"</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="DistributedLDAModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.DistributedLDAModel.html#pyspark.ml.clustering.DistributedLDAModel">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">DistributedLDAModel</span><span class="p">(</span><span class="n">LDAModel</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"DistributedLDAModel"</span><span class="p">],</span> <span class="n">JavaMLWritable</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Distributed model fitted by :py:class:`LDA`.</span> | 
|  | <span class="sd">    This type of model is currently only produced by Expectation-Maximization (EM).</span> | 
|  |  | 
|  | <span class="sd">    This model stores the inferred topics, the full training dataset, and the topic distribution</span> | 
|  | <span class="sd">    for each training document.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="DistributedLDAModel.toLocal"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.DistributedLDAModel.html#pyspark.ml.clustering.DistributedLDAModel.toLocal">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">toLocal</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LocalLDAModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Convert this distributed model to a local representation.  This discards info about the</span> | 
|  | <span class="sd">        training dataset.</span> | 
|  |  | 
|  | <span class="sd">        .. warning:: This involves collecting a large :py:func:`topicsMatrix` to the driver.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">model</span> <span class="o">=</span> <span class="n">LocalLDAModel</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"toLocal"</span><span class="p">))</span> | 
|  |  | 
|  | <span class="c1"># SPARK-10931: Temporary fix to be removed once LDAModel defines Params</span> | 
|  | <span class="n">model</span><span class="o">.</span><span class="n">_create_params_from_java</span><span class="p">()</span> | 
|  | <span class="n">model</span><span class="o">.</span><span class="n">_transfer_params_from_java</span><span class="p">()</span> | 
|  |  | 
|  | <span class="k">return</span> <span class="n">model</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="DistributedLDAModel.trainingLogLikelihood"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.DistributedLDAModel.html#pyspark.ml.clustering.DistributedLDAModel.trainingLogLikelihood">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">trainingLogLikelihood</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Log likelihood of the observed tokens in the training set,</span> | 
|  | <span class="sd">        given the current parameter estimates:</span> | 
|  | <span class="sd">        log P(docs | topics, topic distributions for docs, Dirichlet hyperparameters)</span> | 
|  |  | 
|  | <span class="sd">        Notes</span> | 
|  | <span class="sd">        -----</span> | 
|  | <span class="sd">        - This excludes the prior; for that, use :py:func:`logPrior`.</span> | 
|  | <span class="sd">        - Even with :py:func:`logPrior`, this is NOT the same as the data log likelihood given</span> | 
|  | <span class="sd">            the hyperparameters.</span> | 
|  | <span class="sd">        - This is computed from the topic distributions computed during training. If you call</span> | 
|  | <span class="sd">            :py:func:`logLikelihood` on the same training dataset, the topic distributions</span> | 
|  | <span class="sd">            will be computed again, possibly giving different results.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"trainingLogLikelihood"</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="DistributedLDAModel.logPrior"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.DistributedLDAModel.html#pyspark.ml.clustering.DistributedLDAModel.logPrior">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">logPrior</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Log probability of the current parameter estimate:</span> | 
|  | <span class="sd">        log P(topics, topic distributions for docs | alpha, eta)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"logPrior"</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="DistributedLDAModel.getCheckpointFiles"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.DistributedLDAModel.html#pyspark.ml.clustering.DistributedLDAModel.getCheckpointFiles">[docs]</a>    <span class="k">def</span> <span class="nf">getCheckpointFiles</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        If using checkpointing and :py:attr:`LDA.keepLastCheckpoint` is set to true, then there may</span> | 
|  | <span class="sd">        be saved checkpoint files.  This method is provided so that users can manage those files.</span> | 
|  |  | 
|  | <span class="sd">        .. versionadded:: 2.0.0</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        list</span> | 
|  | <span class="sd">            List of checkpoint files from training</span> | 
|  |  | 
|  | <span class="sd">        Notes</span> | 
|  | <span class="sd">        -----</span> | 
|  | <span class="sd">        Removing the checkpoints can cause failures if a partition is lost and is needed</span> | 
|  | <span class="sd">        by certain :py:class:`DistributedLDAModel` methods.  Reference counting will clean up</span> | 
|  | <span class="sd">        the checkpoints when this model and derivative data go out of scope.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"getCheckpointFiles"</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="LocalLDAModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LocalLDAModel.html#pyspark.ml.clustering.LocalLDAModel">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">LocalLDAModel</span><span class="p">(</span><span class="n">LDAModel</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"LocalLDAModel"</span><span class="p">],</span> <span class="n">JavaMLWritable</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Local (non-distributed) model fitted by :py:class:`LDA`.</span> | 
|  | <span class="sd">    This model stores the inferred topics only; it does not store info about the training dataset.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="k">pass</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="LDA"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">LDA</span><span class="p">(</span><span class="n">JavaEstimator</span><span class="p">[</span><span class="n">LDAModel</span><span class="p">],</span> <span class="n">_LDAParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"LDA"</span><span class="p">],</span> <span class="n">JavaMLWritable</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Latent Dirichlet Allocation (LDA), a topic model designed for text documents.</span> | 
|  |  | 
|  | <span class="sd">    Terminology:</span> | 
|  |  | 
|  | <span class="sd">     - "term" = "word": an element of the vocabulary</span> | 
|  | <span class="sd">     - "token": instance of a term appearing in a document</span> | 
|  | <span class="sd">     - "topic": multinomial distribution over terms representing some concept</span> | 
|  | <span class="sd">     - "document": one piece of text, corresponding to one row in the input data</span> | 
|  |  | 
|  | <span class="sd">    Original LDA paper (journal version):</span> | 
|  | <span class="sd">      Blei, Ng, and Jordan.  "Latent Dirichlet Allocation."  JMLR, 2003.</span> | 
|  |  | 
|  | <span class="sd">    Input data (featuresCol):</span> | 
|  | <span class="sd">    LDA is given a collection of documents as input data, via the featuresCol parameter.</span> | 
|  | <span class="sd">    Each document is specified as a :py:class:`Vector` of length vocabSize, where each entry is the</span> | 
|  | <span class="sd">    count for the corresponding term (word) in the document.  Feature transformers such as</span> | 
|  | <span class="sd">    :py:class:`pyspark.ml.feature.Tokenizer` and :py:class:`pyspark.ml.feature.CountVectorizer`</span> | 
|  | <span class="sd">    can be useful for converting text to word count vectors.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.0.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors, SparseVector</span> | 
|  | <span class="sd">    >>> from pyspark.ml.clustering import LDA</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([[1, Vectors.dense([0.0, 1.0])],</span> | 
|  | <span class="sd">    ...      [2, SparseVector(2, {0: 1.0})],], ["id", "features"])</span> | 
|  | <span class="sd">    >>> lda = LDA(k=2, seed=1, optimizer="em")</span> | 
|  | <span class="sd">    >>> lda.setMaxIter(10)</span> | 
|  | <span class="sd">    LDA...</span> | 
|  | <span class="sd">    >>> lda.getMaxIter()</span> | 
|  | <span class="sd">    10</span> | 
|  | <span class="sd">    >>> lda.clear(lda.maxIter)</span> | 
|  | <span class="sd">    >>> model = lda.fit(df)</span> | 
|  | <span class="sd">    >>> model.setSeed(1)</span> | 
|  | <span class="sd">    DistributedLDAModel...</span> | 
|  | <span class="sd">    >>> model.getTopicDistributionCol()</span> | 
|  | <span class="sd">    'topicDistribution'</span> | 
|  | <span class="sd">    >>> model.isDistributed()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> localModel = model.toLocal()</span> | 
|  | <span class="sd">    >>> localModel.isDistributed()</span> | 
|  | <span class="sd">    False</span> | 
|  | <span class="sd">    >>> model.vocabSize()</span> | 
|  | <span class="sd">    2</span> | 
|  | <span class="sd">    >>> model.describeTopics().show()</span> | 
|  | <span class="sd">    +-----+-----------+--------------------+</span> | 
|  | <span class="sd">    |topic|termIndices|         termWeights|</span> | 
|  | <span class="sd">    +-----+-----------+--------------------+</span> | 
|  | <span class="sd">    |    0|     [1, 0]|[0.50401530077160...|</span> | 
|  | <span class="sd">    |    1|     [0, 1]|[0.50401530077160...|</span> | 
|  | <span class="sd">    +-----+-----------+--------------------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> model.topicsMatrix()</span> | 
|  | <span class="sd">    DenseMatrix(2, 2, [0.496, 0.504, 0.504, 0.496], 0)</span> | 
|  | <span class="sd">    >>> lda_path = temp_path + "/lda"</span> | 
|  | <span class="sd">    >>> lda.save(lda_path)</span> | 
|  | <span class="sd">    >>> sameLDA = LDA.load(lda_path)</span> | 
|  | <span class="sd">    >>> distributed_model_path = temp_path + "/lda_distributed_model"</span> | 
|  | <span class="sd">    >>> model.save(distributed_model_path)</span> | 
|  | <span class="sd">    >>> sameModel = DistributedLDAModel.load(distributed_model_path)</span> | 
|  | <span class="sd">    >>> local_model_path = temp_path + "/lda_local_model"</span> | 
|  | <span class="sd">    >>> localModel.save(local_model_path)</span> | 
|  | <span class="sd">    >>> sameLocalModel = LocalLDAModel.load(local_model_path)</span> | 
|  | <span class="sd">    >>> model.transform(df).take(1) == sameLocalModel.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">20</span><span class="p">,</span> | 
|  | <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">checkpointInterval</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span> | 
|  | <span class="n">k</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span> | 
|  | <span class="n">optimizer</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"online"</span><span class="p">,</span> | 
|  | <span class="n">learningOffset</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1024.0</span><span class="p">,</span> | 
|  | <span class="n">learningDecay</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.51</span><span class="p">,</span> | 
|  | <span class="n">subsamplingRate</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.05</span><span class="p">,</span> | 
|  | <span class="n">optimizeDocConcentration</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">docConcentration</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">topicConcentration</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">topicDistributionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"topicDistribution"</span><span class="p">,</span> | 
|  | <span class="n">keepLastCheckpoint</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,\</span> | 
|  | <span class="sd">                  k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,\</span> | 
|  | <span class="sd">                  subsamplingRate=0.05, optimizeDocConcentration=True,\</span> | 
|  | <span class="sd">                  docConcentration=None, topicConcentration=None,\</span> | 
|  | <span class="sd">                  topicDistributionCol="topicDistribution", keepLastCheckpoint=True)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">LDA</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.clustering.LDA"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="n">LDAModel</span><span class="p">:</span> | 
|  | <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOptimizer</span><span class="p">()</span> <span class="o">==</span> <span class="s2">"em"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">DistributedLDAModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">LocalLDAModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="LDA.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">20</span><span class="p">,</span> | 
|  | <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">checkpointInterval</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span> | 
|  | <span class="n">k</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span> | 
|  | <span class="n">optimizer</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"online"</span><span class="p">,</span> | 
|  | <span class="n">learningOffset</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1024.0</span><span class="p">,</span> | 
|  | <span class="n">learningDecay</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.51</span><span class="p">,</span> | 
|  | <span class="n">subsamplingRate</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.05</span><span class="p">,</span> | 
|  | <span class="n">optimizeDocConcentration</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">docConcentration</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">topicConcentration</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">topicDistributionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"topicDistribution"</span><span class="p">,</span> | 
|  | <span class="n">keepLastCheckpoint</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"LDA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,\</span> | 
|  | <span class="sd">                  k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,\</span> | 
|  | <span class="sd">                  subsamplingRate=0.05, optimizeDocConcentration=True,\</span> | 
|  | <span class="sd">                  docConcentration=None, topicConcentration=None,\</span> | 
|  | <span class="sd">                  topicDistributionCol="topicDistribution", keepLastCheckpoint=True)</span> | 
|  |  | 
|  | <span class="sd">        Sets params for LDA.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDA.setCheckpointInterval"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA.setCheckpointInterval">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setCheckpointInterval</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LDA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`checkpointInterval`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">checkpointInterval</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDA.setSeed"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA.setSeed">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setSeed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LDA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`seed`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDA.setK"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA.setK">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setK</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LDA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`k`.</span> | 
|  |  | 
|  | <span class="sd">        >>> algo = LDA().setK(10)</span> | 
|  | <span class="sd">        >>> algo.getK()</span> | 
|  | <span class="sd">        10</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">k</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDA.setOptimizer"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA.setOptimizer">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOptimizer</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LDA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`optimizer`.</span> | 
|  | <span class="sd">        Currently only support 'em' and 'online'.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> algo = LDA().setOptimizer("em")</span> | 
|  | <span class="sd">        >>> algo.getOptimizer()</span> | 
|  | <span class="sd">        'em'</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">optimizer</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDA.setLearningOffset"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA.setLearningOffset">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setLearningOffset</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LDA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`learningOffset`.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> algo = LDA().setLearningOffset(100)</span> | 
|  | <span class="sd">        >>> algo.getLearningOffset()</span> | 
|  | <span class="sd">        100.0</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">learningOffset</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDA.setLearningDecay"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA.setLearningDecay">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setLearningDecay</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LDA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`learningDecay`.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> algo = LDA().setLearningDecay(0.1)</span> | 
|  | <span class="sd">        >>> algo.getLearningDecay()</span> | 
|  | <span class="sd">        0.1...</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">learningDecay</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDA.setSubsamplingRate"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA.setSubsamplingRate">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setSubsamplingRate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LDA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`subsamplingRate`.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> algo = LDA().setSubsamplingRate(0.1)</span> | 
|  | <span class="sd">        >>> algo.getSubsamplingRate()</span> | 
|  | <span class="sd">        0.1...</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">subsamplingRate</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDA.setOptimizeDocConcentration"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA.setOptimizeDocConcentration">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOptimizeDocConcentration</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LDA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`optimizeDocConcentration`.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> algo = LDA().setOptimizeDocConcentration(True)</span> | 
|  | <span class="sd">        >>> algo.getOptimizeDocConcentration()</span> | 
|  | <span class="sd">        True</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">optimizeDocConcentration</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDA.setDocConcentration"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA.setDocConcentration">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setDocConcentration</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"LDA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`docConcentration`.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> algo = LDA().setDocConcentration([0.1, 0.2])</span> | 
|  | <span class="sd">        >>> algo.getDocConcentration()</span> | 
|  | <span class="sd">        [0.1..., 0.2...]</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">docConcentration</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDA.setTopicConcentration"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA.setTopicConcentration">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setTopicConcentration</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LDA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`topicConcentration`.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> algo = LDA().setTopicConcentration(0.5)</span> | 
|  | <span class="sd">        >>> algo.getTopicConcentration()</span> | 
|  | <span class="sd">        0.5...</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">topicConcentration</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDA.setTopicDistributionCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA.setTopicDistributionCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setTopicDistributionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LDA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`topicDistributionCol`.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> algo = LDA().setTopicDistributionCol("topicDistributionCol")</span> | 
|  | <span class="sd">        >>> algo.getTopicDistributionCol()</span> | 
|  | <span class="sd">        'topicDistributionCol'</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">topicDistributionCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDA.setKeepLastCheckpoint"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA.setKeepLastCheckpoint">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setKeepLastCheckpoint</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LDA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`keepLastCheckpoint`.</span> | 
|  |  | 
|  | <span class="sd">        Examples</span> | 
|  | <span class="sd">        --------</span> | 
|  | <span class="sd">        >>> algo = LDA().setKeepLastCheckpoint(False)</span> | 
|  | <span class="sd">        >>> algo.getKeepLastCheckpoint()</span> | 
|  | <span class="sd">        False</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">keepLastCheckpoint</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDA.setMaxIter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA.setMaxIter">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMaxIter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LDA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`maxIter`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="LDA.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.LDA.html#pyspark.ml.clustering.LDA.setFeaturesCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LDA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featuresCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">_PowerIterationClusteringParams</span><span class="p">(</span><span class="n">HasMaxIter</span><span class="p">,</span> <span class="n">HasWeightCol</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`PowerIterationClustering`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">k</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"k"</span><span class="p">,</span> | 
|  | <span class="s2">"The number of clusters to create. Must be > 1."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">initMode</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"initMode"</span><span class="p">,</span> | 
|  | <span class="s2">"The initialization algorithm. This can be either "</span> | 
|  | <span class="o">+</span> <span class="s2">"'random' to use a random vector as vertex properties, or 'degree' to use "</span> | 
|  | <span class="o">+</span> <span class="s2">"a normalized sum of similarities with other vertices.  Supported options: "</span> | 
|  | <span class="o">+</span> <span class="s2">"'random' and 'degree'."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">srcCol</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"srcCol"</span><span class="p">,</span> | 
|  | <span class="s2">"Name of the input column for source vertex IDs."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">dstCol</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"dstCol"</span><span class="p">,</span> | 
|  | <span class="s2">"Name of the input column for destination vertex IDs."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_PowerIterationClusteringParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">k</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">maxIter</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">initMode</span><span class="o">=</span><span class="s2">"random"</span><span class="p">,</span> <span class="n">srcCol</span><span class="o">=</span><span class="s2">"src"</span><span class="p">,</span> <span class="n">dstCol</span><span class="o">=</span><span class="s2">"dst"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getK</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`k` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">k</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getInitMode</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`initMode` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">initMode</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getSrcCol</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`srcCol` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">srcCol</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getDstCol</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`dstCol` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dstCol</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="PowerIterationClustering"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.PowerIterationClustering.html#pyspark.ml.clustering.PowerIterationClustering">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">PowerIterationClustering</span><span class="p">(</span> | 
|  | <span class="n">_PowerIterationClusteringParams</span><span class="p">,</span> | 
|  | <span class="n">JavaParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"PowerIterationClustering"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by</span> | 
|  | <span class="sd">    `Lin and Cohen <http://www.cs.cmu.edu/~frank/papers/icml2010-pic-final.pdf>`_. From the</span> | 
|  | <span class="sd">    abstract: PIC finds a very low-dimensional embedding of a dataset using truncated power</span> | 
|  | <span class="sd">    iteration on a normalized pair-wise similarity matrix of the data.</span> | 
|  |  | 
|  | <span class="sd">    This class is not yet an Estimator/Transformer, use :py:func:`assignClusters` method</span> | 
|  | <span class="sd">    to run the PowerIterationClustering algorithm.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.4.0</span> | 
|  |  | 
|  | <span class="sd">    Notes</span> | 
|  | <span class="sd">    -----</span> | 
|  | <span class="sd">    See `Wikipedia on Spectral clustering <http://en.wikipedia.org/wiki/Spectral_clustering>`_</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> data = [(1, 0, 0.5),</span> | 
|  | <span class="sd">    ...         (2, 0, 0.5), (2, 1, 0.7),</span> | 
|  | <span class="sd">    ...         (3, 0, 0.5), (3, 1, 0.7), (3, 2, 0.9),</span> | 
|  | <span class="sd">    ...         (4, 0, 0.5), (4, 1, 0.7), (4, 2, 0.9), (4, 3, 1.1),</span> | 
|  | <span class="sd">    ...         (5, 0, 0.5), (5, 1, 0.7), (5, 2, 0.9), (5, 3, 1.1), (5, 4, 1.3)]</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame(data).toDF("src", "dst", "weight").repartition(1)</span> | 
|  | <span class="sd">    >>> pic = PowerIterationClustering(k=2, weightCol="weight")</span> | 
|  | <span class="sd">    >>> pic.setMaxIter(40)</span> | 
|  | <span class="sd">    PowerIterationClustering...</span> | 
|  | <span class="sd">    >>> assignments = pic.assignClusters(df)</span> | 
|  | <span class="sd">    >>> assignments.sort(assignments.id).show(truncate=False)</span> | 
|  | <span class="sd">    +---+-------+</span> | 
|  | <span class="sd">    |id |cluster|</span> | 
|  | <span class="sd">    +---+-------+</span> | 
|  | <span class="sd">    |0  |0      |</span> | 
|  | <span class="sd">    |1  |0      |</span> | 
|  | <span class="sd">    |2  |0      |</span> | 
|  | <span class="sd">    |3  |0      |</span> | 
|  | <span class="sd">    |4  |0      |</span> | 
|  | <span class="sd">    |5  |1      |</span> | 
|  | <span class="sd">    +---+-------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> pic_path = temp_path + "/pic"</span> | 
|  | <span class="sd">    >>> pic.save(pic_path)</span> | 
|  | <span class="sd">    >>> pic2 = PowerIterationClustering.load(pic_path)</span> | 
|  | <span class="sd">    >>> pic2.getK()</span> | 
|  | <span class="sd">    2</span> | 
|  | <span class="sd">    >>> pic2.getMaxIter()</span> | 
|  | <span class="sd">    40</span> | 
|  | <span class="sd">    >>> pic2.assignClusters(df).take(6) == assignments.take(6)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">k</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> | 
|  | <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">20</span><span class="p">,</span> | 
|  | <span class="n">initMode</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"random"</span><span class="p">,</span> | 
|  | <span class="n">srcCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"src"</span><span class="p">,</span> | 
|  | <span class="n">dstCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"dst"</span><span class="p">,</span> | 
|  | <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",\</span> | 
|  | <span class="sd">                 weightCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">PowerIterationClustering</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> | 
|  | <span class="s2">"org.apache.spark.ml.clustering.PowerIterationClustering"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="PowerIterationClustering.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.PowerIterationClustering.html#pyspark.ml.clustering.PowerIterationClustering.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">k</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> | 
|  | <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">20</span><span class="p">,</span> | 
|  | <span class="n">initMode</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"random"</span><span class="p">,</span> | 
|  | <span class="n">srcCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"src"</span><span class="p">,</span> | 
|  | <span class="n">dstCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"dst"</span><span class="p">,</span> | 
|  | <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"PowerIterationClustering"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",\</span> | 
|  | <span class="sd">                  weightCol=None)</span> | 
|  | <span class="sd">        Sets params for PowerIterationClustering.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="PowerIterationClustering.setK"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.PowerIterationClustering.html#pyspark.ml.clustering.PowerIterationClustering.setK">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setK</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"PowerIterationClustering"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`k`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">k</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="PowerIterationClustering.setInitMode"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.PowerIterationClustering.html#pyspark.ml.clustering.PowerIterationClustering.setInitMode">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInitMode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"PowerIterationClustering"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`initMode`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">initMode</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="PowerIterationClustering.setSrcCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.PowerIterationClustering.html#pyspark.ml.clustering.PowerIterationClustering.setSrcCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setSrcCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"PowerIterationClustering"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`srcCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">srcCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="PowerIterationClustering.setDstCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.PowerIterationClustering.html#pyspark.ml.clustering.PowerIterationClustering.setDstCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setDstCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"PowerIterationClustering"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`dstCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">dstCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="PowerIterationClustering.setMaxIter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.PowerIterationClustering.html#pyspark.ml.clustering.PowerIterationClustering.setMaxIter">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMaxIter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"PowerIterationClustering"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`maxIter`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="PowerIterationClustering.setWeightCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.PowerIterationClustering.html#pyspark.ml.clustering.PowerIterationClustering.setWeightCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setWeightCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"PowerIterationClustering"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`weightCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">weightCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="PowerIterationClustering.assignClusters"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.clustering.PowerIterationClustering.html#pyspark.ml.clustering.PowerIterationClustering.assignClusters">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">assignClusters</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Run the PIC algorithm and returns a cluster assignment for each input vertex.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        dataset : :py:class:`pyspark.sql.DataFrame`</span> | 
|  | <span class="sd">          A dataset with columns src, dst, weight representing the affinity matrix,</span> | 
|  | <span class="sd">          which is the matrix A in the PIC paper. Suppose the src column value is i,</span> | 
|  | <span class="sd">          the dst column value is j, the weight column value is similarity s,,ij,,</span> | 
|  | <span class="sd">          which must be nonnegative. This is a symmetric matrix and hence</span> | 
|  | <span class="sd">          s,,ij,, = s,,ji,,. For any (i, j) with nonzero similarity, there should be</span> | 
|  | <span class="sd">          either (i, j, s,,ij,,) or (j, i, s,,ji,,) in the input. Rows with i = j are</span> | 
|  | <span class="sd">          ignored, because we assume s,,ij,, = 0.0.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        :py:class:`pyspark.sql.DataFrame`</span> | 
|  | <span class="sd">            A dataset that contains columns of vertex id and the corresponding cluster for</span> | 
|  | <span class="sd">            the id. The schema of it will be:</span> | 
|  | <span class="sd">            - id: Long</span> | 
|  | <span class="sd">            - cluster: Int</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_transfer_params_to_java</span><span class="p">()</span> | 
|  | <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> | 
|  |  | 
|  | <span class="n">jdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span><span class="o">.</span><span class="n">assignClusters</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">_jdf</span><span class="p">)</span> | 
|  | <span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">jdf</span><span class="p">,</span> <span class="n">dataset</span><span class="o">.</span><span class="n">sparkSession</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span> | 
|  | <span class="kn">import</span> <span class="nn">doctest</span> | 
|  | <span class="kn">import</span> <span class="nn">numpy</span> | 
|  | <span class="kn">import</span> <span class="nn">pyspark.ml.clustering</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span> | 
|  |  | 
|  | <span class="k">try</span><span class="p">:</span> | 
|  | <span class="c1"># Numpy 1.14+ changed it's string format.</span> | 
|  | <span class="n">numpy</span><span class="o">.</span><span class="n">set_printoptions</span><span class="p">(</span><span class="n">legacy</span><span class="o">=</span><span class="s2">"1.13"</span><span class="p">)</span> | 
|  | <span class="k">except</span> <span class="ne">TypeError</span><span class="p">:</span> | 
|  | <span class="k">pass</span> | 
|  | <span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">ml</span><span class="o">.</span><span class="n">clustering</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> | 
|  | <span class="c1"># The small batch size here ensures that we see multiple batches,</span> | 
|  | <span class="c1"># even in these small test examples:</span> | 
|  | <span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">master</span><span class="p">(</span><span class="s2">"local[2]"</span><span class="p">)</span><span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">"ml.clustering tests"</span><span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span> | 
|  | <span class="n">sc</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">sparkContext</span> | 
|  | <span class="n">globs</span><span class="p">[</span><span class="s2">"sc"</span><span class="p">]</span> <span class="o">=</span> <span class="n">sc</span> | 
|  | <span class="n">globs</span><span class="p">[</span><span class="s2">"spark"</span><span class="p">]</span> <span class="o">=</span> <span class="n">spark</span> | 
|  | <span class="kn">import</span> <span class="nn">tempfile</span> | 
|  |  | 
|  | <span class="n">temp_path</span> <span class="o">=</span> <span class="n">tempfile</span><span class="o">.</span><span class="n">mkdtemp</span><span class="p">()</span> | 
|  | <span class="n">globs</span><span class="p">[</span><span class="s2">"temp_path"</span><span class="p">]</span> <span class="o">=</span> <span class="n">temp_path</span> | 
|  | <span class="k">try</span><span class="p">:</span> | 
|  | <span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span><span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span> <span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span><span class="p">)</span> | 
|  | <span class="n">spark</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span> | 
|  | <span class="k">finally</span><span class="p">:</span> | 
|  | <span class="kn">from</span> <span class="nn">shutil</span> <span class="kn">import</span> <span class="n">rmtree</span> | 
|  |  | 
|  | <span class="k">try</span><span class="p">:</span> | 
|  | <span class="n">rmtree</span><span class="p">(</span><span class="n">temp_path</span><span class="p">)</span> | 
|  | <span class="k">except</span> <span class="ne">OSError</span><span class="p">:</span> | 
|  | <span class="k">pass</span> | 
|  | <span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span> | 
|  | <span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span> | 
|  | </pre></div> | 
|  |  | 
|  | </article> | 
|  |  | 
|  |  | 
|  |  | 
|  | <footer class="bd-footer-article"> | 
|  |  | 
|  | <div class="footer-article-items footer-article__inner"> | 
|  |  | 
|  | <div class="footer-article-item"><!-- Previous / next buttons --> | 
|  | <div class="prev-next-area"> | 
|  | </div></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </footer> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | </div> | 
|  | <footer class="bd-footer-content"> | 
|  |  | 
|  | </footer> | 
|  |  | 
|  | </main> | 
|  | </div> | 
|  | </div> | 
|  |  | 
|  | <!-- Scripts loaded after <body> so the DOM is not blocked --> | 
|  | <script src="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52"></script> | 
|  | <script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52"></script> | 
|  |  | 
|  | <footer class="bd-footer"> | 
|  | <div class="bd-footer__inner bd-page-width"> | 
|  |  | 
|  | <div class="footer-items__start"> | 
|  |  | 
|  | <div class="footer-item"><p class="copyright"> | 
|  | Copyright @ 2024 The Apache Software Foundation, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>. | 
|  | </p></div> | 
|  |  | 
|  | <div class="footer-item"> | 
|  | <p class="sphinx-version"> | 
|  | Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 4.5.0. | 
|  | <br/> | 
|  | </p> | 
|  | </div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="footer-items__end"> | 
|  |  | 
|  | <div class="footer-item"><p class="theme-version"> | 
|  | Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.13.3. | 
|  | </p></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </footer> | 
|  | </body> | 
|  | </html> |