|  |  | 
|  |  | 
|  | <!DOCTYPE html> | 
|  |  | 
|  |  | 
|  | <html > | 
|  |  | 
|  | <head> | 
|  | <meta charset="utf-8" /> | 
|  | <meta name="viewport" content="width=device-width, initial-scale=1.0" /> | 
|  | <title>pyspark.ml.feature — PySpark 4.0.0-preview1 documentation</title> | 
|  |  | 
|  |  | 
|  |  | 
|  | <script data-cfasync="false"> | 
|  | document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; | 
|  | document.documentElement.dataset.theme = localStorage.getItem("theme") || "light"; | 
|  | </script> | 
|  |  | 
|  | <!-- Loaded before other Sphinx assets --> | 
|  | <link href="../../../_static/styles/theme.css?digest=e353d410970836974a52" rel="stylesheet" /> | 
|  | <link href="../../../_static/styles/bootstrap.css?digest=e353d410970836974a52" rel="stylesheet" /> | 
|  | <link href="../../../_static/styles/pydata-sphinx-theme.css?digest=e353d410970836974a52" rel="stylesheet" /> | 
|  |  | 
|  |  | 
|  | <link href="../../../_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=e353d410970836974a52" rel="stylesheet" /> | 
|  | <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" /> | 
|  | <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" /> | 
|  | <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" /> | 
|  |  | 
|  | <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" /> | 
|  | <link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" /> | 
|  | <link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" /> | 
|  |  | 
|  | <!-- Pre-loaded scripts that we'll load fully later --> | 
|  | <link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52" /> | 
|  | <link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52" /> | 
|  |  | 
|  | <script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script> | 
|  | <script src="../../../_static/jquery.js"></script> | 
|  | <script src="../../../_static/underscore.js"></script> | 
|  | <script src="../../../_static/doctools.js"></script> | 
|  | <script src="../../../_static/clipboard.min.js"></script> | 
|  | <script src="../../../_static/copybutton.js"></script> | 
|  | <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script> | 
|  | <script>DOCUMENTATION_OPTIONS.pagename = '_modules/pyspark/ml/feature';</script> | 
|  | <link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/ml/feature.html" /> | 
|  | <link rel="search" title="Search" href="../../../search.html" /> | 
|  | <meta name="viewport" content="width=device-width, initial-scale=1" /> | 
|  | <meta name="docsearch:language" content="None"> | 
|  |  | 
|  |  | 
|  | <!-- Matomo --> | 
|  | <script type="text/javascript"> | 
|  | var _paq = window._paq = window._paq || []; | 
|  | /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ | 
|  | _paq.push(["disableCookies"]); | 
|  | _paq.push(['trackPageView']); | 
|  | _paq.push(['enableLinkTracking']); | 
|  | (function() { | 
|  | var u="https://analytics.apache.org/"; | 
|  | _paq.push(['setTrackerUrl', u+'matomo.php']); | 
|  | _paq.push(['setSiteId', '40']); | 
|  | var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; | 
|  | g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); | 
|  | })(); | 
|  | </script> | 
|  | <!-- End Matomo Code --> | 
|  |  | 
|  | </head> | 
|  |  | 
|  |  | 
|  | <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode=""> | 
|  |  | 
|  |  | 
|  |  | 
|  | <a class="skip-link" href="#main-content">Skip to main content</a> | 
|  |  | 
|  | <input type="checkbox" | 
|  | class="sidebar-toggle" | 
|  | name="__primary" | 
|  | id="__primary"/> | 
|  | <label class="overlay overlay-primary" for="__primary"></label> | 
|  |  | 
|  | <input type="checkbox" | 
|  | class="sidebar-toggle" | 
|  | name="__secondary" | 
|  | id="__secondary"/> | 
|  | <label class="overlay overlay-secondary" for="__secondary"></label> | 
|  |  | 
|  | <div class="search-button__wrapper"> | 
|  | <div class="search-button__overlay"></div> | 
|  | <div class="search-button__search-container"> | 
|  | <form class="bd-search d-flex align-items-center" | 
|  | action="../../../search.html" | 
|  | method="get"> | 
|  | <i class="fa-solid fa-magnifying-glass"></i> | 
|  | <input type="search" | 
|  | class="form-control" | 
|  | name="q" | 
|  | id="search-input" | 
|  | placeholder="Search the docs ..." | 
|  | aria-label="Search the docs ..." | 
|  | autocomplete="off" | 
|  | autocorrect="off" | 
|  | autocapitalize="off" | 
|  | spellcheck="false"/> | 
|  | <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span> | 
|  | </form></div> | 
|  | </div> | 
|  |  | 
|  | <nav class="bd-header navbar navbar-expand-lg bd-navbar"> | 
|  | <div class="bd-header__inner bd-page-width"> | 
|  | <label class="sidebar-toggle primary-toggle" for="__primary"> | 
|  | <span class="fa-solid fa-bars"></span> | 
|  | </label> | 
|  |  | 
|  | <div class="navbar-header-items__start"> | 
|  |  | 
|  | <div class="navbar-item"> | 
|  |  | 
|  |  | 
|  | <a class="navbar-brand logo" href="../../../index.html"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <img src="../../../_static/spark-logo-light.png" class="logo__image only-light" alt="Logo image"/> | 
|  | <script>document.write(`<img src="../../../_static/spark-logo-dark.png" class="logo__image only-dark" alt="Logo image"/>`);</script> | 
|  |  | 
|  |  | 
|  | </a></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="col-lg-9 navbar-header-items"> | 
|  |  | 
|  | <div class="me-auto navbar-header-items__center"> | 
|  |  | 
|  | <div class="navbar-item"><nav class="navbar-nav"> | 
|  | <p class="sidebar-header-items__title" | 
|  | role="heading" | 
|  | aria-level="1" | 
|  | aria-label="Site Navigation"> | 
|  | Site Navigation | 
|  | </p> | 
|  | <ul class="bd-navbar-elements navbar-nav"> | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../index.html"> | 
|  | Overview | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../getting_started/index.html"> | 
|  | Getting Started | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../user_guide/index.html"> | 
|  | User Guides | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../reference/index.html"> | 
|  | API Reference | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../development/index.html"> | 
|  | Development | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../migration_guide/index.html"> | 
|  | Migration Guides | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  | </ul> | 
|  | </nav></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="navbar-header-items__end"> | 
|  |  | 
|  | <div class="navbar-item navbar-persistent--container"> | 
|  |  | 
|  | <script> | 
|  | document.write(` | 
|  | <button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> | 
|  | <i class="fa-solid fa-magnifying-glass"></i> | 
|  | </button> | 
|  | `); | 
|  | </script> | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="navbar-item"><!-- | 
|  | Licensed to the Apache Software Foundation (ASF) under one or more | 
|  | contributor license agreements.  See the NOTICE file distributed with | 
|  | this work for additional information regarding copyright ownership. | 
|  | The ASF licenses this file to You under the Apache License, Version 2.0 | 
|  | (the "License"); you may not use this file except in compliance with | 
|  | the License.  You may obtain a copy of the License at | 
|  |  | 
|  | http://www.apache.org/licenses/LICENSE-2.0 | 
|  |  | 
|  | Unless required by applicable law or agreed to in writing, software | 
|  | distributed under the License is distributed on an "AS IS" BASIS, | 
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | See the License for the specific language governing permissions and | 
|  | limitations under the License. | 
|  | --> | 
|  |  | 
|  | <div id="version-button" class="dropdown"> | 
|  | <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown"> | 
|  | 4.0.0-preview1 | 
|  | <span class="caret"></span> | 
|  | </button> | 
|  | <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button"> | 
|  | <!-- dropdown will be populated by javascript on page load --> | 
|  | </div> | 
|  | </div> | 
|  |  | 
|  | <script type="text/javascript"> | 
|  | // Function to construct the target URL from the JSON components | 
|  | function buildURL(entry) { | 
|  | var template = "https://spark.apache.org/docs/{version}/api/python/index.html";  // supplied by jinja | 
|  | template = template.replace("{version}", entry.version); | 
|  | return template; | 
|  | } | 
|  |  | 
|  | // Function to check if corresponding page path exists in other version of docs | 
|  | // and, if so, go there instead of the homepage of the other docs version | 
|  | function checkPageExistsAndRedirect(event) { | 
|  | const currentFilePath = "_modules/pyspark/ml/feature.html", | 
|  | otherDocsHomepage = event.target.getAttribute("href"); | 
|  | let tryUrl = `${otherDocsHomepage}${currentFilePath}`; | 
|  | $.ajax({ | 
|  | type: 'HEAD', | 
|  | url: tryUrl, | 
|  | // if the page exists, go there | 
|  | success: function() { | 
|  | location.href = tryUrl; | 
|  | } | 
|  | }).fail(function() { | 
|  | location.href = otherDocsHomepage; | 
|  | }); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Function to populate the version switcher | 
|  | (function () { | 
|  | // get JSON config | 
|  | $.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) { | 
|  | // create the nodes first (before AJAX calls) to ensure the order is | 
|  | // correct (for now, links will go to doc version homepage) | 
|  | $.each(data, function(index, entry) { | 
|  | // if no custom name specified (e.g., "latest"), use version string | 
|  | if (!("name" in entry)) { | 
|  | entry.name = entry.version; | 
|  | } | 
|  | // construct the appropriate URL, and add it to the dropdown | 
|  | entry.url = buildURL(entry); | 
|  | const node = document.createElement("a"); | 
|  | node.setAttribute("class", "list-group-item list-group-item-action py-1"); | 
|  | node.setAttribute("href", `${entry.url}`); | 
|  | node.textContent = `${entry.name}`; | 
|  | node.onclick = checkPageExistsAndRedirect; | 
|  | $("#version_switcher").append(node); | 
|  | }); | 
|  | }); | 
|  | })(); | 
|  | </script></div> | 
|  |  | 
|  | <div class="navbar-item"> | 
|  | <script> | 
|  | document.write(` | 
|  | <button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip"> | 
|  | <span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span> | 
|  | <span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span> | 
|  | <span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span> | 
|  | </button> | 
|  | `); | 
|  | </script></div> | 
|  |  | 
|  | <div class="navbar-item"><ul class="navbar-icon-links navbar-nav" | 
|  | aria-label="Icon Links"> | 
|  | <li class="nav-item"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span> | 
|  | <label class="sr-only">GitHub</label></a> | 
|  | </li> | 
|  | <li class="nav-item"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span> | 
|  | <label class="sr-only">PyPI</label></a> | 
|  | </li> | 
|  | </ul></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="navbar-persistent--mobile"> | 
|  | <script> | 
|  | document.write(` | 
|  | <button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> | 
|  | <i class="fa-solid fa-magnifying-glass"></i> | 
|  | </button> | 
|  | `); | 
|  | </script> | 
|  | </div> | 
|  |  | 
|  |  | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </nav> | 
|  |  | 
|  | <div class="bd-container"> | 
|  | <div class="bd-container__inner bd-page-width"> | 
|  |  | 
|  | <div class="bd-sidebar-primary bd-sidebar hide-on-wide"> | 
|  |  | 
|  |  | 
|  |  | 
|  | <div class="sidebar-header-items sidebar-primary__section"> | 
|  |  | 
|  |  | 
|  | <div class="sidebar-header-items__center"> | 
|  |  | 
|  | <div class="navbar-item"><nav class="navbar-nav"> | 
|  | <p class="sidebar-header-items__title" | 
|  | role="heading" | 
|  | aria-level="1" | 
|  | aria-label="Site Navigation"> | 
|  | Site Navigation | 
|  | </p> | 
|  | <ul class="bd-navbar-elements navbar-nav"> | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../index.html"> | 
|  | Overview | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../getting_started/index.html"> | 
|  | Getting Started | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../user_guide/index.html"> | 
|  | User Guides | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../reference/index.html"> | 
|  | API Reference | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../development/index.html"> | 
|  | Development | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  |  | 
|  | <li class="nav-item"> | 
|  | <a class="nav-link nav-internal" href="../../../migration_guide/index.html"> | 
|  | Migration Guides | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  | </ul> | 
|  | </nav></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  |  | 
|  | <div class="sidebar-header-items__end"> | 
|  |  | 
|  | <div class="navbar-item"><!-- | 
|  | Licensed to the Apache Software Foundation (ASF) under one or more | 
|  | contributor license agreements.  See the NOTICE file distributed with | 
|  | this work for additional information regarding copyright ownership. | 
|  | The ASF licenses this file to You under the Apache License, Version 2.0 | 
|  | (the "License"); you may not use this file except in compliance with | 
|  | the License.  You may obtain a copy of the License at | 
|  |  | 
|  | http://www.apache.org/licenses/LICENSE-2.0 | 
|  |  | 
|  | Unless required by applicable law or agreed to in writing, software | 
|  | distributed under the License is distributed on an "AS IS" BASIS, | 
|  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | See the License for the specific language governing permissions and | 
|  | limitations under the License. | 
|  | --> | 
|  |  | 
|  | <div id="version-button" class="dropdown"> | 
|  | <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown"> | 
|  | 4.0.0-preview1 | 
|  | <span class="caret"></span> | 
|  | </button> | 
|  | <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button"> | 
|  | <!-- dropdown will be populated by javascript on page load --> | 
|  | </div> | 
|  | </div> | 
|  |  | 
|  | <script type="text/javascript"> | 
|  | // Function to construct the target URL from the JSON components | 
|  | function buildURL(entry) { | 
|  | var template = "https://spark.apache.org/docs/{version}/api/python/index.html";  // supplied by jinja | 
|  | template = template.replace("{version}", entry.version); | 
|  | return template; | 
|  | } | 
|  |  | 
|  | // Function to check if corresponding page path exists in other version of docs | 
|  | // and, if so, go there instead of the homepage of the other docs version | 
|  | function checkPageExistsAndRedirect(event) { | 
|  | const currentFilePath = "_modules/pyspark/ml/feature.html", | 
|  | otherDocsHomepage = event.target.getAttribute("href"); | 
|  | let tryUrl = `${otherDocsHomepage}${currentFilePath}`; | 
|  | $.ajax({ | 
|  | type: 'HEAD', | 
|  | url: tryUrl, | 
|  | // if the page exists, go there | 
|  | success: function() { | 
|  | location.href = tryUrl; | 
|  | } | 
|  | }).fail(function() { | 
|  | location.href = otherDocsHomepage; | 
|  | }); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Function to populate the version switcher | 
|  | (function () { | 
|  | // get JSON config | 
|  | $.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) { | 
|  | // create the nodes first (before AJAX calls) to ensure the order is | 
|  | // correct (for now, links will go to doc version homepage) | 
|  | $.each(data, function(index, entry) { | 
|  | // if no custom name specified (e.g., "latest"), use version string | 
|  | if (!("name" in entry)) { | 
|  | entry.name = entry.version; | 
|  | } | 
|  | // construct the appropriate URL, and add it to the dropdown | 
|  | entry.url = buildURL(entry); | 
|  | const node = document.createElement("a"); | 
|  | node.setAttribute("class", "list-group-item list-group-item-action py-1"); | 
|  | node.setAttribute("href", `${entry.url}`); | 
|  | node.textContent = `${entry.name}`; | 
|  | node.onclick = checkPageExistsAndRedirect; | 
|  | $("#version_switcher").append(node); | 
|  | }); | 
|  | }); | 
|  | })(); | 
|  | </script></div> | 
|  |  | 
|  | <div class="navbar-item"> | 
|  | <script> | 
|  | document.write(` | 
|  | <button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip"> | 
|  | <span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span> | 
|  | <span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span> | 
|  | <span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span> | 
|  | </button> | 
|  | `); | 
|  | </script></div> | 
|  |  | 
|  | <div class="navbar-item"><ul class="navbar-icon-links navbar-nav" | 
|  | aria-label="Icon Links"> | 
|  | <li class="nav-item"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span> | 
|  | <label class="sr-only">GitHub</label></a> | 
|  | </li> | 
|  | <li class="nav-item"> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span> | 
|  | <label class="sr-only">PyPI</label></a> | 
|  | </li> | 
|  | </ul></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="sidebar-primary-items__end sidebar-primary__section"> | 
|  | </div> | 
|  |  | 
|  | <div id="rtd-footer-container"></div> | 
|  |  | 
|  |  | 
|  | </div> | 
|  |  | 
|  | <main id="main-content" class="bd-main"> | 
|  |  | 
|  |  | 
|  | <div class="bd-content"> | 
|  | <div class="bd-article-container"> | 
|  |  | 
|  | <div class="bd-header-article"> | 
|  | <div class="header-article-items header-article__inner"> | 
|  |  | 
|  | <div class="header-article-items__start"> | 
|  |  | 
|  | <div class="header-article-item"> | 
|  |  | 
|  |  | 
|  |  | 
|  | <nav aria-label="Breadcrumbs"> | 
|  | <ul class="bd-breadcrumbs" role="navigation" aria-label="Breadcrumb"> | 
|  |  | 
|  | <li class="breadcrumb-item breadcrumb-home"> | 
|  | <a href="../../../index.html" class="nav-link" aria-label="Home"> | 
|  | <i class="fa-solid fa-home"></i> | 
|  | </a> | 
|  | </li> | 
|  |  | 
|  | <li class="breadcrumb-item"><a href="../../index.html" class="nav-link">Module code</a></li> | 
|  |  | 
|  | <li class="breadcrumb-item active" aria-current="page">pyspark.ml.feature</li> | 
|  | </ul> | 
|  | </nav> | 
|  | </div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | </div> | 
|  | </div> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | <div id="searchbox"></div> | 
|  | <article class="bd-article" role="main"> | 
|  |  | 
|  | <h1>Source code for pyspark.ml.feature</h1><div class="highlight"><pre> | 
|  | <span></span><span class="c1">#</span> | 
|  | <span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span> | 
|  | <span class="c1"># contributor license agreements.  See the NOTICE file distributed with</span> | 
|  | <span class="c1"># this work for additional information regarding copyright ownership.</span> | 
|  | <span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span> | 
|  | <span class="c1"># (the "License"); you may not use this file except in compliance with</span> | 
|  | <span class="c1"># the License.  You may obtain a copy of the License at</span> | 
|  | <span class="c1">#</span> | 
|  | <span class="c1">#    http://www.apache.org/licenses/LICENSE-2.0</span> | 
|  | <span class="c1">#</span> | 
|  | <span class="c1"># Unless required by applicable law or agreed to in writing, software</span> | 
|  | <span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span> | 
|  | <span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> | 
|  | <span class="c1"># See the License for the specific language governing permissions and</span> | 
|  | <span class="c1"># limitations under the License.</span> | 
|  | <span class="c1">#</span> | 
|  | <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="p">(</span> | 
|  | <span class="n">cast</span><span class="p">,</span> | 
|  | <span class="n">overload</span><span class="p">,</span> | 
|  | <span class="n">Any</span><span class="p">,</span> | 
|  | <span class="n">Dict</span><span class="p">,</span> | 
|  | <span class="n">Generic</span><span class="p">,</span> | 
|  | <span class="n">List</span><span class="p">,</span> | 
|  | <span class="n">Optional</span><span class="p">,</span> | 
|  | <span class="n">Tuple</span><span class="p">,</span> | 
|  | <span class="n">TypeVar</span><span class="p">,</span> | 
|  | <span class="n">Union</span><span class="p">,</span> | 
|  | <span class="n">TYPE_CHECKING</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">keyword_only</span><span class="p">,</span> <span class="n">since</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.ml.linalg</span> <span class="kn">import</span> <span class="n">_convert_to_vector</span><span class="p">,</span> <span class="n">DenseMatrix</span><span class="p">,</span> <span class="n">DenseVector</span><span class="p">,</span> <span class="n">Vector</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.sql.dataframe</span> <span class="kn">import</span> <span class="n">DataFrame</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.ml.param.shared</span> <span class="kn">import</span> <span class="p">(</span> | 
|  | <span class="n">HasThreshold</span><span class="p">,</span> | 
|  | <span class="n">HasThresholds</span><span class="p">,</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">HasInputCols</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCols</span><span class="p">,</span> | 
|  | <span class="n">HasHandleInvalid</span><span class="p">,</span> | 
|  | <span class="n">HasRelativeError</span><span class="p">,</span> | 
|  | <span class="n">HasFeaturesCol</span><span class="p">,</span> | 
|  | <span class="n">HasLabelCol</span><span class="p">,</span> | 
|  | <span class="n">HasSeed</span><span class="p">,</span> | 
|  | <span class="n">HasNumFeatures</span><span class="p">,</span> | 
|  | <span class="n">HasStepSize</span><span class="p">,</span> | 
|  | <span class="n">HasMaxIter</span><span class="p">,</span> | 
|  | <span class="n">TypeConverters</span><span class="p">,</span> | 
|  | <span class="n">Param</span><span class="p">,</span> | 
|  | <span class="n">Params</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.ml.util</span> <span class="kn">import</span> <span class="n">JavaMLReadable</span><span class="p">,</span> <span class="n">JavaMLWritable</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.ml.wrapper</span> <span class="kn">import</span> <span class="n">JavaEstimator</span><span class="p">,</span> <span class="n">JavaModel</span><span class="p">,</span> <span class="n">JavaParams</span><span class="p">,</span> <span class="n">JavaTransformer</span><span class="p">,</span> <span class="n">_jvm</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.ml.common</span> <span class="kn">import</span> <span class="n">inherit_doc</span> | 
|  |  | 
|  | <span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> | 
|  | <span class="kn">from</span> <span class="nn">py4j.java_gateway</span> <span class="kn">import</span> <span class="n">JavaObject</span> | 
|  |  | 
|  | <span class="n">JM</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">"JM"</span><span class="p">,</span> <span class="n">bound</span><span class="o">=</span><span class="n">JavaTransformer</span><span class="p">)</span> | 
|  | <span class="n">P</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">"P"</span><span class="p">,</span> <span class="n">bound</span><span class="o">=</span><span class="n">Params</span><span class="p">)</span> | 
|  |  | 
|  | <span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span> | 
|  | <span class="s2">"Binarizer"</span><span class="p">,</span> | 
|  | <span class="s2">"BucketedRandomProjectionLSH"</span><span class="p">,</span> | 
|  | <span class="s2">"BucketedRandomProjectionLSHModel"</span><span class="p">,</span> | 
|  | <span class="s2">"Bucketizer"</span><span class="p">,</span> | 
|  | <span class="s2">"ChiSqSelector"</span><span class="p">,</span> | 
|  | <span class="s2">"ChiSqSelectorModel"</span><span class="p">,</span> | 
|  | <span class="s2">"CountVectorizer"</span><span class="p">,</span> | 
|  | <span class="s2">"CountVectorizerModel"</span><span class="p">,</span> | 
|  | <span class="s2">"DCT"</span><span class="p">,</span> | 
|  | <span class="s2">"ElementwiseProduct"</span><span class="p">,</span> | 
|  | <span class="s2">"FeatureHasher"</span><span class="p">,</span> | 
|  | <span class="s2">"HashingTF"</span><span class="p">,</span> | 
|  | <span class="s2">"IDF"</span><span class="p">,</span> | 
|  | <span class="s2">"IDFModel"</span><span class="p">,</span> | 
|  | <span class="s2">"Imputer"</span><span class="p">,</span> | 
|  | <span class="s2">"ImputerModel"</span><span class="p">,</span> | 
|  | <span class="s2">"IndexToString"</span><span class="p">,</span> | 
|  | <span class="s2">"Interaction"</span><span class="p">,</span> | 
|  | <span class="s2">"MaxAbsScaler"</span><span class="p">,</span> | 
|  | <span class="s2">"MaxAbsScalerModel"</span><span class="p">,</span> | 
|  | <span class="s2">"MinHashLSH"</span><span class="p">,</span> | 
|  | <span class="s2">"MinHashLSHModel"</span><span class="p">,</span> | 
|  | <span class="s2">"MinMaxScaler"</span><span class="p">,</span> | 
|  | <span class="s2">"MinMaxScalerModel"</span><span class="p">,</span> | 
|  | <span class="s2">"NGram"</span><span class="p">,</span> | 
|  | <span class="s2">"Normalizer"</span><span class="p">,</span> | 
|  | <span class="s2">"OneHotEncoder"</span><span class="p">,</span> | 
|  | <span class="s2">"OneHotEncoderModel"</span><span class="p">,</span> | 
|  | <span class="s2">"PCA"</span><span class="p">,</span> | 
|  | <span class="s2">"PCAModel"</span><span class="p">,</span> | 
|  | <span class="s2">"PolynomialExpansion"</span><span class="p">,</span> | 
|  | <span class="s2">"QuantileDiscretizer"</span><span class="p">,</span> | 
|  | <span class="s2">"RobustScaler"</span><span class="p">,</span> | 
|  | <span class="s2">"RobustScalerModel"</span><span class="p">,</span> | 
|  | <span class="s2">"RegexTokenizer"</span><span class="p">,</span> | 
|  | <span class="s2">"RFormula"</span><span class="p">,</span> | 
|  | <span class="s2">"RFormulaModel"</span><span class="p">,</span> | 
|  | <span class="s2">"SQLTransformer"</span><span class="p">,</span> | 
|  | <span class="s2">"StandardScaler"</span><span class="p">,</span> | 
|  | <span class="s2">"StandardScalerModel"</span><span class="p">,</span> | 
|  | <span class="s2">"StopWordsRemover"</span><span class="p">,</span> | 
|  | <span class="s2">"StringIndexer"</span><span class="p">,</span> | 
|  | <span class="s2">"StringIndexerModel"</span><span class="p">,</span> | 
|  | <span class="s2">"Tokenizer"</span><span class="p">,</span> | 
|  | <span class="s2">"UnivariateFeatureSelector"</span><span class="p">,</span> | 
|  | <span class="s2">"UnivariateFeatureSelectorModel"</span><span class="p">,</span> | 
|  | <span class="s2">"VarianceThresholdSelector"</span><span class="p">,</span> | 
|  | <span class="s2">"VarianceThresholdSelectorModel"</span><span class="p">,</span> | 
|  | <span class="s2">"VectorAssembler"</span><span class="p">,</span> | 
|  | <span class="s2">"VectorIndexer"</span><span class="p">,</span> | 
|  | <span class="s2">"VectorIndexerModel"</span><span class="p">,</span> | 
|  | <span class="s2">"VectorSizeHint"</span><span class="p">,</span> | 
|  | <span class="s2">"VectorSlicer"</span><span class="p">,</span> | 
|  | <span class="s2">"Word2Vec"</span><span class="p">,</span> | 
|  | <span class="s2">"Word2VecModel"</span><span class="p">,</span> | 
|  | <span class="p">]</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="Binarizer"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Binarizer.html#pyspark.ml.feature.Binarizer">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">Binarizer</span><span class="p">(</span> | 
|  | <span class="n">JavaTransformer</span><span class="p">,</span> | 
|  | <span class="n">HasThreshold</span><span class="p">,</span> | 
|  | <span class="n">HasThresholds</span><span class="p">,</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">HasInputCols</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCols</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"Binarizer"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Binarize a column of continuous features given a threshold. Since 3.0.0,</span> | 
|  | <span class="sd">    :py:class:`Binarize` can map multiple columns at once by setting the :py:attr:`inputCols`</span> | 
|  | <span class="sd">    parameter. Note that when both the :py:attr:`inputCol` and :py:attr:`inputCols` parameters</span> | 
|  | <span class="sd">    are set, an Exception will be thrown. The :py:attr:`threshold` parameter is used for</span> | 
|  | <span class="sd">    single column usage, and :py:attr:`thresholds` is for multiple columns.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(0.5,)], ["values"])</span> | 
|  | <span class="sd">    >>> binarizer = Binarizer(threshold=1.0, inputCol="values", outputCol="features")</span> | 
|  | <span class="sd">    >>> binarizer.setThreshold(1.0)</span> | 
|  | <span class="sd">    Binarizer...</span> | 
|  | <span class="sd">    >>> binarizer.setInputCol("values")</span> | 
|  | <span class="sd">    Binarizer...</span> | 
|  | <span class="sd">    >>> binarizer.setOutputCol("features")</span> | 
|  | <span class="sd">    Binarizer...</span> | 
|  | <span class="sd">    >>> binarizer.transform(df).head().features</span> | 
|  | <span class="sd">    0.0</span> | 
|  | <span class="sd">    >>> binarizer.setParams(outputCol="freqs").transform(df).head().freqs</span> | 
|  | <span class="sd">    0.0</span> | 
|  | <span class="sd">    >>> params = {binarizer.threshold: -0.5, binarizer.outputCol: "vector"}</span> | 
|  | <span class="sd">    >>> binarizer.transform(df, params).head().vector</span> | 
|  | <span class="sd">    1.0</span> | 
|  | <span class="sd">    >>> binarizerPath = temp_path + "/binarizer"</span> | 
|  | <span class="sd">    >>> binarizer.save(binarizerPath)</span> | 
|  | <span class="sd">    >>> loadedBinarizer = Binarizer.load(binarizerPath)</span> | 
|  | <span class="sd">    >>> loadedBinarizer.getThreshold() == binarizer.getThreshold()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedBinarizer.transform(df).take(1) == binarizer.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> df2 = spark.createDataFrame([(0.5, 0.3)], ["values1", "values2"])</span> | 
|  | <span class="sd">    >>> binarizer2 = Binarizer(thresholds=[0.0, 1.0])</span> | 
|  | <span class="sd">    >>> binarizer2.setInputCols(["values1", "values2"]).setOutputCols(["output1", "output2"])</span> | 
|  | <span class="sd">    Binarizer...</span> | 
|  | <span class="sd">    >>> binarizer2.transform(df2).show()</span> | 
|  | <span class="sd">    +-------+-------+-------+-------+</span> | 
|  | <span class="sd">    |values1|values2|output1|output2|</span> | 
|  | <span class="sd">    +-------+-------+-------+-------+</span> | 
|  | <span class="sd">    |    0.5|    0.3|    1.0|    0.0|</span> | 
|  | <span class="sd">    +-------+-------+-------+-------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">threshold</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"threshold"</span><span class="p">,</span> | 
|  | <span class="s2">"Param for threshold used to binarize continuous features. "</span> | 
|  | <span class="o">+</span> <span class="s2">"The features greater than the threshold will be binarized to 1.0. "</span> | 
|  | <span class="o">+</span> <span class="s2">"The features equal to or less than the threshold will be binarized to 0.0"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">thresholds</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"thresholds"</span><span class="p">,</span> | 
|  | <span class="s2">"Param for array of threshold used to binarize continuous features. "</span> | 
|  | <span class="o">+</span> <span class="s2">"This is for multiple columns input. If transforming multiple columns "</span> | 
|  | <span class="o">+</span> <span class="s2">"and thresholds is not set, but threshold is set, then threshold will "</span> | 
|  | <span class="o">+</span> <span class="s2">"be applied across all columns."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toListFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">threshold</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">thresholds</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">threshold</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">thresholds</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, threshold=0.0, inputCol=None, outputCol=None, thresholds=None, \</span> | 
|  | <span class="sd">                 inputCols=None, outputCols=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">Binarizer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.Binarizer"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">threshold</span><span class="o">=</span><span class="mf">0.0</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">threshold</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"Binarizer"</span><span class="p">:</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">thresholds</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"Binarizer"</span><span class="p">:</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <div class="viewcode-block" id="Binarizer.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Binarizer.html#pyspark.ml.feature.Binarizer.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">threshold</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">thresholds</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"Binarizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, threshold=0.0, inputCol=None, outputCol=None, thresholds=None, \</span> | 
|  | <span class="sd">                  inputCols=None, outputCols=None)</span> | 
|  | <span class="sd">        Sets params for this Binarizer.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Binarizer.setThreshold"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Binarizer.html#pyspark.ml.feature.Binarizer.setThreshold">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setThreshold</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Binarizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`threshold`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">threshold</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Binarizer.setThresholds"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Binarizer.html#pyspark.ml.feature.Binarizer.setThresholds">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setThresholds</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Binarizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`thresholds`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">thresholds</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Binarizer.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Binarizer.html#pyspark.ml.feature.Binarizer.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Binarizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Binarizer.setInputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Binarizer.html#pyspark.ml.feature.Binarizer.setInputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Binarizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Binarizer.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Binarizer.html#pyspark.ml.feature.Binarizer.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Binarizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Binarizer.setOutputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Binarizer.html#pyspark.ml.feature.Binarizer.setOutputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Binarizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_LSHParams</span><span class="p">(</span><span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Mixin for Locality Sensitive Hashing (LSH) algorithm parameters.</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">numHashTables</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"numHashTables"</span><span class="p">,</span> | 
|  | <span class="s2">"number of hash tables, where "</span> | 
|  | <span class="o">+</span> <span class="s2">"increasing number of hash tables lowers the false negative rate, "</span> | 
|  | <span class="o">+</span> <span class="s2">"and decreasing it improves the running performance."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_LSHParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">numHashTables</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">getNumHashTables</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of numHashTables or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">numHashTables</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_LSH</span><span class="p">(</span><span class="n">JavaEstimator</span><span class="p">[</span><span class="n">JM</span><span class="p">],</span> <span class="n">_LSHParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">,</span> <span class="n">JavaMLWritable</span><span class="p">,</span> <span class="n">Generic</span><span class="p">[</span><span class="n">JM</span><span class="p">]):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Mixin for Locality Sensitive Hashing (LSH).</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">setNumHashTables</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`numHashTables`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">numHashTables</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_LSHModel</span><span class="p">(</span><span class="n">JavaModel</span><span class="p">,</span> <span class="n">_LSHParams</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Mixin for Locality Sensitive Hashing (LSH) models.</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">approxNearestNeighbors</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> | 
|  | <span class="n">key</span><span class="p">:</span> <span class="n">Vector</span><span class="p">,</span> | 
|  | <span class="n">numNearestNeighbors</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> | 
|  | <span class="n">distCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"distCol"</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Given a large dataset and an item, approximately find at most k items which have the</span> | 
|  | <span class="sd">        closest distance to the item. If the :py:attr:`outputCol` is missing, the method will</span> | 
|  | <span class="sd">        transform the data; if the :py:attr:`outputCol` exists, it will use that. This allows</span> | 
|  | <span class="sd">        caching of the transformed data when necessary.</span> | 
|  |  | 
|  | <span class="sd">        Notes</span> | 
|  | <span class="sd">        -----</span> | 
|  | <span class="sd">        This method is experimental and will likely change behavior in the next release.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        dataset : :py:class:`pyspark.sql.DataFrame`</span> | 
|  | <span class="sd">            The dataset to search for nearest neighbors of the key.</span> | 
|  | <span class="sd">        key :  :py:class:`pyspark.ml.linalg.Vector`</span> | 
|  | <span class="sd">            Feature vector representing the item to search for.</span> | 
|  | <span class="sd">        numNearestNeighbors : int</span> | 
|  | <span class="sd">            The maximum number of nearest neighbors.</span> | 
|  | <span class="sd">        distCol : str</span> | 
|  | <span class="sd">            Output column for storing the distance between each result row and the key.</span> | 
|  | <span class="sd">            Use "distCol" as default value if it's not specified.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        :py:class:`pyspark.sql.DataFrame`</span> | 
|  | <span class="sd">            A dataset containing at most k items closest to the key. A column "distCol" is</span> | 
|  | <span class="sd">            added to show the distance between each row and the key.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"approxNearestNeighbors"</span><span class="p">,</span> <span class="n">dataset</span><span class="p">,</span> <span class="n">key</span><span class="p">,</span> <span class="n">numNearestNeighbors</span><span class="p">,</span> <span class="n">distCol</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">approxSimilarityJoin</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="n">datasetA</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> | 
|  | <span class="n">datasetB</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> | 
|  | <span class="n">threshold</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> | 
|  | <span class="n">distCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"distCol"</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Join two datasets to approximately find all pairs of rows whose distance are smaller than</span> | 
|  | <span class="sd">        the threshold. If the :py:attr:`outputCol` is missing, the method will transform the data;</span> | 
|  | <span class="sd">        if the :py:attr:`outputCol` exists, it will use that. This allows caching of the</span> | 
|  | <span class="sd">        transformed data when necessary.</span> | 
|  |  | 
|  | <span class="sd">        Parameters</span> | 
|  | <span class="sd">        ----------</span> | 
|  | <span class="sd">        datasetA : :py:class:`pyspark.sql.DataFrame`</span> | 
|  | <span class="sd">            One of the datasets to join.</span> | 
|  | <span class="sd">        datasetB : :py:class:`pyspark.sql.DataFrame`</span> | 
|  | <span class="sd">            Another dataset to join.</span> | 
|  | <span class="sd">        threshold : float</span> | 
|  | <span class="sd">            The threshold for the distance of row pairs.</span> | 
|  | <span class="sd">        distCol : str, optional</span> | 
|  | <span class="sd">            Output column for storing the distance between each pair of rows. Use</span> | 
|  | <span class="sd">            "distCol" as default value if it's not specified.</span> | 
|  |  | 
|  | <span class="sd">        Returns</span> | 
|  | <span class="sd">        -------</span> | 
|  | <span class="sd">        :py:class:`pyspark.sql.DataFrame`</span> | 
|  | <span class="sd">            A joined dataset containing pairs of rows. The original rows are in columns</span> | 
|  | <span class="sd">            "datasetA" and "datasetB", and a column "distCol" is added to show the distance</span> | 
|  | <span class="sd">            between each pair.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">threshold</span> <span class="o">=</span> <span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">(</span><span class="n">threshold</span><span class="p">)</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"approxSimilarityJoin"</span><span class="p">,</span> <span class="n">datasetA</span><span class="p">,</span> <span class="n">datasetB</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">distCol</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_BucketedRandomProjectionLSHParams</span><span class="p">:</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`BucketedRandomProjectionLSH` and</span> | 
|  | <span class="sd">    :py:class:`BucketedRandomProjectionLSHModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">bucketLength</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"bucketLength"</span><span class="p">,</span> | 
|  | <span class="s2">"the length of each hash bucket, "</span> <span class="o">+</span> <span class="s2">"a larger bucket lowers the false negative rate."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getBucketLength</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of bucketLength or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="p">(</span><span class="n">cast</span><span class="p">(</span><span class="n">Params</span><span class="p">,</span> <span class="bp">self</span><span class="p">))</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">bucketLength</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="BucketedRandomProjectionLSH"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.BucketedRandomProjectionLSH.html#pyspark.ml.feature.BucketedRandomProjectionLSH">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">BucketedRandomProjectionLSH</span><span class="p">(</span> | 
|  | <span class="n">_LSH</span><span class="p">[</span><span class="s2">"BucketedRandomProjectionLSHModel"</span><span class="p">],</span> | 
|  | <span class="n">_LSHParams</span><span class="p">,</span> | 
|  | <span class="n">_BucketedRandomProjectionLSHParams</span><span class="p">,</span> | 
|  | <span class="n">HasSeed</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"BucketedRandomProjectionLSH"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    LSH class for Euclidean distance metrics.</span> | 
|  | <span class="sd">    The input is dense or sparse vectors, each of which represents a point in the Euclidean</span> | 
|  | <span class="sd">    distance space. The output will be vectors of configurable dimension. Hash values in the same</span> | 
|  | <span class="sd">    dimension are calculated by the same hash function.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.2.0</span> | 
|  |  | 
|  | <span class="sd">    Notes</span> | 
|  | <span class="sd">    -----</span> | 
|  |  | 
|  | <span class="sd">    - `Stable Distributions in Wikipedia article on Locality-sensitive hashing \</span> | 
|  | <span class="sd">      <https://en.wikipedia.org/wiki/Locality-sensitive_hashing#Stable_distributions>`_</span> | 
|  | <span class="sd">    - `Hashing for Similarity Search: A Survey <https://arxiv.org/abs/1408.2927>`_</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> from pyspark.sql.functions import col</span> | 
|  | <span class="sd">    >>> data = [(0, Vectors.dense([-1.0, -1.0 ]),),</span> | 
|  | <span class="sd">    ...         (1, Vectors.dense([-1.0, 1.0 ]),),</span> | 
|  | <span class="sd">    ...         (2, Vectors.dense([1.0, -1.0 ]),),</span> | 
|  | <span class="sd">    ...         (3, Vectors.dense([1.0, 1.0]),)]</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame(data, ["id", "features"])</span> | 
|  | <span class="sd">    >>> brp = BucketedRandomProjectionLSH()</span> | 
|  | <span class="sd">    >>> brp.setInputCol("features")</span> | 
|  | <span class="sd">    BucketedRandomProjectionLSH...</span> | 
|  | <span class="sd">    >>> brp.setOutputCol("hashes")</span> | 
|  | <span class="sd">    BucketedRandomProjectionLSH...</span> | 
|  | <span class="sd">    >>> brp.setSeed(12345)</span> | 
|  | <span class="sd">    BucketedRandomProjectionLSH...</span> | 
|  | <span class="sd">    >>> brp.setBucketLength(1.0)</span> | 
|  | <span class="sd">    BucketedRandomProjectionLSH...</span> | 
|  | <span class="sd">    >>> model = brp.fit(df)</span> | 
|  | <span class="sd">    >>> model.getBucketLength()</span> | 
|  | <span class="sd">    1.0</span> | 
|  | <span class="sd">    >>> model.setOutputCol("hashes")</span> | 
|  | <span class="sd">    BucketedRandomProjectionLSHModel...</span> | 
|  | <span class="sd">    >>> model.transform(df).head()</span> | 
|  | <span class="sd">    Row(id=0, features=DenseVector([-1.0, -1.0]), hashes=[DenseVector([-1.0])])</span> | 
|  | <span class="sd">    >>> data2 = [(4, Vectors.dense([2.0, 2.0 ]),),</span> | 
|  | <span class="sd">    ...          (5, Vectors.dense([2.0, 3.0 ]),),</span> | 
|  | <span class="sd">    ...          (6, Vectors.dense([3.0, 2.0 ]),),</span> | 
|  | <span class="sd">    ...          (7, Vectors.dense([3.0, 3.0]),)]</span> | 
|  | <span class="sd">    >>> df2 = spark.createDataFrame(data2, ["id", "features"])</span> | 
|  | <span class="sd">    >>> model.approxNearestNeighbors(df2, Vectors.dense([1.0, 2.0]), 1).collect()</span> | 
|  | <span class="sd">    [Row(id=4, features=DenseVector([2.0, 2.0]), hashes=[DenseVector([1.0])], distCol=1.0)]</span> | 
|  | <span class="sd">    >>> model.approxSimilarityJoin(df, df2, 3.0, distCol="EuclideanDistance").select(</span> | 
|  | <span class="sd">    ...     col("datasetA.id").alias("idA"),</span> | 
|  | <span class="sd">    ...     col("datasetB.id").alias("idB"),</span> | 
|  | <span class="sd">    ...     col("EuclideanDistance")).show()</span> | 
|  | <span class="sd">    +---+---+-----------------+</span> | 
|  | <span class="sd">    |idA|idB|EuclideanDistance|</span> | 
|  | <span class="sd">    +---+---+-----------------+</span> | 
|  | <span class="sd">    |  3|  6| 2.23606797749979|</span> | 
|  | <span class="sd">    +---+---+-----------------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> model.approxSimilarityJoin(df, df2, 3, distCol="EuclideanDistance").select(</span> | 
|  | <span class="sd">    ...     col("datasetA.id").alias("idA"),</span> | 
|  | <span class="sd">    ...     col("datasetB.id").alias("idB"),</span> | 
|  | <span class="sd">    ...     col("EuclideanDistance")).show()</span> | 
|  | <span class="sd">    +---+---+-----------------+</span> | 
|  | <span class="sd">    |idA|idB|EuclideanDistance|</span> | 
|  | <span class="sd">    +---+---+-----------------+</span> | 
|  | <span class="sd">    |  3|  6| 2.23606797749979|</span> | 
|  | <span class="sd">    +---+---+-----------------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> brpPath = temp_path + "/brp"</span> | 
|  | <span class="sd">    >>> brp.save(brpPath)</span> | 
|  | <span class="sd">    >>> brp2 = BucketedRandomProjectionLSH.load(brpPath)</span> | 
|  | <span class="sd">    >>> brp2.getBucketLength() == brp.getBucketLength()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/brp-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> model2 = BucketedRandomProjectionLSHModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> model.transform(df).head().hashes == model2.transform(df).head().hashes</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">numHashTables</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> | 
|  | <span class="n">bucketLength</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, inputCol=None, outputCol=None, seed=None, numHashTables=1, \</span> | 
|  | <span class="sd">                 bucketLength=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">BucketedRandomProjectionLSH</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> | 
|  | <span class="s2">"org.apache.spark.ml.feature.BucketedRandomProjectionLSH"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="BucketedRandomProjectionLSH.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.BucketedRandomProjectionLSH.html#pyspark.ml.feature.BucketedRandomProjectionLSH.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">numHashTables</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> | 
|  | <span class="n">bucketLength</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"BucketedRandomProjectionLSH"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, inputCol=None, outputCol=None, seed=None, numHashTables=1, \</span> | 
|  | <span class="sd">                  bucketLength=None)</span> | 
|  | <span class="sd">        Sets params for this BucketedRandomProjectionLSH.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="BucketedRandomProjectionLSH.setBucketLength"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.BucketedRandomProjectionLSH.html#pyspark.ml.feature.BucketedRandomProjectionLSH.setBucketLength">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setBucketLength</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"BucketedRandomProjectionLSH"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`bucketLength`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">bucketLength</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="BucketedRandomProjectionLSH.setSeed"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.BucketedRandomProjectionLSH.html#pyspark.ml.feature.BucketedRandomProjectionLSH.setSeed">[docs]</a>    <span class="k">def</span> <span class="nf">setSeed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"BucketedRandomProjectionLSH"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`seed`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"BucketedRandomProjectionLSHModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">BucketedRandomProjectionLSHModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="BucketedRandomProjectionLSHModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.BucketedRandomProjectionLSHModel.html#pyspark.ml.feature.BucketedRandomProjectionLSHModel">[docs]</a><span class="k">class</span> <span class="nc">BucketedRandomProjectionLSHModel</span><span class="p">(</span> | 
|  | <span class="n">_LSHModel</span><span class="p">,</span> | 
|  | <span class="n">_BucketedRandomProjectionLSHParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"BucketedRandomProjectionLSHModel"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sa">r</span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`BucketedRandomProjectionLSH`, where multiple random vectors are</span> | 
|  | <span class="sd">    stored. The vectors are normalized to be unit vectors and each vector is used in a hash</span> | 
|  | <span class="sd">    function: :math:`h_i(x) = floor(r_i \cdot x / bucketLength)` where :math:`r_i` is the</span> | 
|  | <span class="sd">    i-th random unit vector. The number of buckets will be `(max L2 norm of input vectors) /</span> | 
|  | <span class="sd">    bucketLength`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.2.0</span> | 
|  | <span class="sd">    """</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="Bucketizer"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Bucketizer.html#pyspark.ml.feature.Bucketizer">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">Bucketizer</span><span class="p">(</span> | 
|  | <span class="n">JavaTransformer</span><span class="p">,</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">HasInputCols</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCols</span><span class="p">,</span> | 
|  | <span class="n">HasHandleInvalid</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"Bucketizer"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Maps a column of continuous features to a column of feature buckets. Since 3.0.0,</span> | 
|  | <span class="sd">    :py:class:`Bucketizer` can map multiple columns at once by setting the :py:attr:`inputCols`</span> | 
|  | <span class="sd">    parameter. Note that when both the :py:attr:`inputCol` and :py:attr:`inputCols` parameters</span> | 
|  | <span class="sd">    are set, an Exception will be thrown. The :py:attr:`splits` parameter is only used for single</span> | 
|  | <span class="sd">    column usage, and :py:attr:`splitsArray` is for multiple columns.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> values = [(0.1, 0.0), (0.4, 1.0), (1.2, 1.3), (1.5, float("nan")),</span> | 
|  | <span class="sd">    ...     (float("nan"), 1.0), (float("nan"), 0.0)]</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame(values, ["values1", "values2"])</span> | 
|  | <span class="sd">    >>> bucketizer = Bucketizer()</span> | 
|  | <span class="sd">    >>> bucketizer.setSplits([-float("inf"), 0.5, 1.4, float("inf")])</span> | 
|  | <span class="sd">    Bucketizer...</span> | 
|  | <span class="sd">    >>> bucketizer.setInputCol("values1")</span> | 
|  | <span class="sd">    Bucketizer...</span> | 
|  | <span class="sd">    >>> bucketizer.setOutputCol("buckets")</span> | 
|  | <span class="sd">    Bucketizer...</span> | 
|  | <span class="sd">    >>> bucketed = bucketizer.setHandleInvalid("keep").transform(df).collect()</span> | 
|  | <span class="sd">    >>> bucketed = bucketizer.setHandleInvalid("keep").transform(df.select("values1"))</span> | 
|  | <span class="sd">    >>> bucketed.show(truncate=False)</span> | 
|  | <span class="sd">    +-------+-------+</span> | 
|  | <span class="sd">    |values1|buckets|</span> | 
|  | <span class="sd">    +-------+-------+</span> | 
|  | <span class="sd">    |0.1    |0.0    |</span> | 
|  | <span class="sd">    |0.4    |0.0    |</span> | 
|  | <span class="sd">    |1.2    |1.0    |</span> | 
|  | <span class="sd">    |1.5    |2.0    |</span> | 
|  | <span class="sd">    |NaN    |3.0    |</span> | 
|  | <span class="sd">    |NaN    |3.0    |</span> | 
|  | <span class="sd">    +-------+-------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> bucketizer.setParams(outputCol="b").transform(df).head().b</span> | 
|  | <span class="sd">    0.0</span> | 
|  | <span class="sd">    >>> bucketizerPath = temp_path + "/bucketizer"</span> | 
|  | <span class="sd">    >>> bucketizer.save(bucketizerPath)</span> | 
|  | <span class="sd">    >>> loadedBucketizer = Bucketizer.load(bucketizerPath)</span> | 
|  | <span class="sd">    >>> loadedBucketizer.getSplits() == bucketizer.getSplits()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedBucketizer.transform(df).take(1) == bucketizer.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> bucketed = bucketizer.setHandleInvalid("skip").transform(df).collect()</span> | 
|  | <span class="sd">    >>> len(bucketed)</span> | 
|  | <span class="sd">    4</span> | 
|  | <span class="sd">    >>> bucketizer2 = Bucketizer(splitsArray=</span> | 
|  | <span class="sd">    ...     [[-float("inf"), 0.5, 1.4, float("inf")], [-float("inf"), 0.5, float("inf")]],</span> | 
|  | <span class="sd">    ...     inputCols=["values1", "values2"], outputCols=["buckets1", "buckets2"])</span> | 
|  | <span class="sd">    >>> bucketed2 = bucketizer2.setHandleInvalid("keep").transform(df)</span> | 
|  | <span class="sd">    >>> bucketed2.show(truncate=False)</span> | 
|  | <span class="sd">    +-------+-------+--------+--------+</span> | 
|  | <span class="sd">    |values1|values2|buckets1|buckets2|</span> | 
|  | <span class="sd">    +-------+-------+--------+--------+</span> | 
|  | <span class="sd">    |0.1    |0.0    |0.0     |0.0     |</span> | 
|  | <span class="sd">    |0.4    |1.0    |0.0     |1.0     |</span> | 
|  | <span class="sd">    |1.2    |1.3    |1.0     |1.0     |</span> | 
|  | <span class="sd">    |1.5    |NaN    |2.0     |2.0     |</span> | 
|  | <span class="sd">    |NaN    |1.0    |3.0     |1.0     |</span> | 
|  | <span class="sd">    |NaN    |0.0    |3.0     |0.0     |</span> | 
|  | <span class="sd">    +-------+-------+--------+--------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">splits</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"splits"</span><span class="p">,</span> | 
|  | <span class="s2">"Split points for mapping continuous features into buckets. With n+1 splits, "</span> | 
|  | <span class="o">+</span> <span class="s2">"there are n buckets. A bucket defined by splits x,y holds values in the "</span> | 
|  | <span class="o">+</span> <span class="s2">"range [x,y) except the last bucket, which also includes y. The splits "</span> | 
|  | <span class="o">+</span> <span class="s2">"should be of length >= 3 and strictly increasing. Values at -inf, inf must be "</span> | 
|  | <span class="o">+</span> <span class="s2">"explicitly provided to cover all Double values; otherwise, values outside the "</span> | 
|  | <span class="o">+</span> <span class="s2">"splits specified will be treated as errors."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toListFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"handleInvalid"</span><span class="p">,</span> | 
|  | <span class="s2">"how to handle invalid entries "</span> | 
|  | <span class="s2">"containing NaN values. Values outside the splits will always be treated "</span> | 
|  | <span class="s2">"as errors. Options are 'skip' (filter out rows with invalid values), "</span> | 
|  | <span class="o">+</span> <span class="s2">"'error' (throw an error), or 'keep' (keep invalid values in a "</span> | 
|  | <span class="o">+</span> <span class="s2">"special additional bucket). Note that in the multiple column "</span> | 
|  | <span class="o">+</span> <span class="s2">"case, the invalid handling is applied to all columns. That said "</span> | 
|  | <span class="o">+</span> <span class="s2">"for 'error' it will throw an error if any invalids are found in "</span> | 
|  | <span class="o">+</span> <span class="s2">"any column, for 'skip' it will skip rows with any invalids in "</span> | 
|  | <span class="o">+</span> <span class="s2">"any columns, etc."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">splitsArray</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"splitsArray"</span><span class="p">,</span> | 
|  | <span class="s2">"The array of split points for mapping "</span> | 
|  | <span class="o">+</span> <span class="s2">"continuous features into buckets for multiple columns. For each input "</span> | 
|  | <span class="o">+</span> <span class="s2">"column, with n+1 splits, there are n buckets. A bucket defined by "</span> | 
|  | <span class="o">+</span> <span class="s2">"splits x,y holds values in the range [x,y) except the last bucket, "</span> | 
|  | <span class="o">+</span> <span class="s2">"which also includes y. The splits should be of length >= 3 and "</span> | 
|  | <span class="o">+</span> <span class="s2">"strictly increasing. Values at -inf, inf must be explicitly provided "</span> | 
|  | <span class="o">+</span> <span class="s2">"to cover all Double values; otherwise, values outside the splits "</span> | 
|  | <span class="o">+</span> <span class="s2">"specified will be treated as errors."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toListListFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">splits</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">splitsArray</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">splits</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="n">splitsArray</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, splits=None, inputCol=None, outputCol=None, handleInvalid="error", \</span> | 
|  | <span class="sd">                 splitsArray=None, inputCols=None, outputCols=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">Bucketizer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.Bucketizer"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">handleInvalid</span><span class="o">=</span><span class="s2">"error"</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">splits</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"Bucketizer"</span><span class="p">:</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">splitsArray</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"Bucketizer"</span><span class="p">:</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <div class="viewcode-block" id="Bucketizer.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Bucketizer.html#pyspark.ml.feature.Bucketizer.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">splits</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="n">splitsArray</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"Bucketizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, splits=None, inputCol=None, outputCol=None, handleInvalid="error", \</span> | 
|  | <span class="sd">                  splitsArray=None, inputCols=None, outputCols=None)</span> | 
|  | <span class="sd">        Sets params for this Bucketizer.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Bucketizer.setSplits"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Bucketizer.html#pyspark.ml.feature.Bucketizer.setSplits">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setSplits</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Bucketizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`splits`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">splits</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Bucketizer.getSplits"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Bucketizer.html#pyspark.ml.feature.Bucketizer.getSplits">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getSplits</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of threshold or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">splits</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Bucketizer.setSplitsArray"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Bucketizer.html#pyspark.ml.feature.Bucketizer.setSplitsArray">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setSplitsArray</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]])</span> <span class="o">-></span> <span class="s2">"Bucketizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`splitsArray`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">splitsArray</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Bucketizer.getSplitsArray"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Bucketizer.html#pyspark.ml.feature.Bucketizer.getSplitsArray">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getSplitsArray</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the array of split points or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">splitsArray</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Bucketizer.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Bucketizer.html#pyspark.ml.feature.Bucketizer.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Bucketizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Bucketizer.setInputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Bucketizer.html#pyspark.ml.feature.Bucketizer.setInputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Bucketizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Bucketizer.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Bucketizer.html#pyspark.ml.feature.Bucketizer.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Bucketizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Bucketizer.setOutputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Bucketizer.html#pyspark.ml.feature.Bucketizer.setOutputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Bucketizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Bucketizer.setHandleInvalid"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Bucketizer.html#pyspark.ml.feature.Bucketizer.setHandleInvalid">[docs]</a>    <span class="k">def</span> <span class="nf">setHandleInvalid</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Bucketizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`handleInvalid`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">handleInvalid</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_CountVectorizerParams</span><span class="p">(</span><span class="n">JavaParams</span><span class="p">,</span> <span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`CountVectorizer` and :py:class:`CountVectorizerModel`.</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">minTF</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"minTF"</span><span class="p">,</span> | 
|  | <span class="s2">"Filter to ignore rare words in"</span> | 
|  | <span class="o">+</span> <span class="s2">" a document. For each document, terms with frequency/count less than the given"</span> | 
|  | <span class="o">+</span> <span class="s2">" threshold are ignored. If this is an integer >= 1, then this specifies a count (of"</span> | 
|  | <span class="o">+</span> <span class="s2">" times the term must appear in the document); if this is a double in [0,1), then this "</span> | 
|  | <span class="o">+</span> <span class="s2">"specifies a fraction (out of the document's token count). Note that the parameter is "</span> | 
|  | <span class="o">+</span> <span class="s2">"only used in transform of CountVectorizerModel and does not affect fitting. Default 1.0"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">minDF</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"minDF"</span><span class="p">,</span> | 
|  | <span class="s2">"Specifies the minimum number of"</span> | 
|  | <span class="o">+</span> <span class="s2">" different documents a term must appear in to be included in the vocabulary."</span> | 
|  | <span class="o">+</span> <span class="s2">" If this is an integer >= 1, this specifies the number of documents the term must"</span> | 
|  | <span class="o">+</span> <span class="s2">" appear in; if this is a double in [0,1), then this specifies the fraction of documents."</span> | 
|  | <span class="o">+</span> <span class="s2">" Default 1.0"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">maxDF</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"maxDF"</span><span class="p">,</span> | 
|  | <span class="s2">"Specifies the maximum number of"</span> | 
|  | <span class="o">+</span> <span class="s2">" different documents a term could appear in to be included in the vocabulary."</span> | 
|  | <span class="o">+</span> <span class="s2">" A term that appears more than the threshold will be ignored. If this is an"</span> | 
|  | <span class="o">+</span> <span class="s2">" integer >= 1, this specifies the maximum number of documents the term could appear in;"</span> | 
|  | <span class="o">+</span> <span class="s2">" if this is a double in [0,1), then this specifies the maximum"</span> | 
|  | <span class="o">+</span> <span class="s2">" fraction of documents the term could appear in."</span> | 
|  | <span class="o">+</span> <span class="s2">" Default (2^63) - 1"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">vocabSize</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"vocabSize"</span><span class="p">,</span> | 
|  | <span class="s2">"max size of the vocabulary. Default 1 << 18."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">binary</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"binary"</span><span class="p">,</span> | 
|  | <span class="s2">"Binary toggle to control the output vector values."</span> | 
|  | <span class="o">+</span> <span class="s2">" If True, all nonzero counts (after minTF filter applied) are set to 1. This is useful"</span> | 
|  | <span class="o">+</span> <span class="s2">" for discrete probabilistic models that model binary events rather than integer counts."</span> | 
|  | <span class="o">+</span> <span class="s2">" Default False"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toBoolean</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_CountVectorizerParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">minTF</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span> <span class="n">minDF</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span> <span class="n">maxDF</span><span class="o">=</span><span class="mi">2</span><span class="o">**</span><span class="mi">63</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="n">vocabSize</span><span class="o">=</span><span class="mi">1</span> <span class="o"><<</span> <span class="mi">18</span><span class="p">,</span> <span class="n">binary</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getMinTF</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of minTF or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">minTF</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getMinDF</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of minDF or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">minDF</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getMaxDF</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of maxDF or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">maxDF</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getVocabSize</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of vocabSize or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vocabSize</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getBinary</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of binary or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">binary</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="CountVectorizer"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.CountVectorizer.html#pyspark.ml.feature.CountVectorizer">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">CountVectorizer</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">[</span><span class="s2">"CountVectorizerModel"</span><span class="p">],</span> | 
|  | <span class="n">_CountVectorizerParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"CountVectorizer"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Extracts a vocabulary from document collections and generates a :py:attr:`CountVectorizerModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.6.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame(</span> | 
|  | <span class="sd">    ...    [(0, ["a", "b", "c"]), (1, ["a", "b", "b", "c", "a"])],</span> | 
|  | <span class="sd">    ...    ["label", "raw"])</span> | 
|  | <span class="sd">    >>> cv = CountVectorizer()</span> | 
|  | <span class="sd">    >>> cv.setInputCol("raw")</span> | 
|  | <span class="sd">    CountVectorizer...</span> | 
|  | <span class="sd">    >>> cv.setOutputCol("vectors")</span> | 
|  | <span class="sd">    CountVectorizer...</span> | 
|  | <span class="sd">    >>> model = cv.fit(df)</span> | 
|  | <span class="sd">    >>> model.setInputCol("raw")</span> | 
|  | <span class="sd">    CountVectorizerModel...</span> | 
|  | <span class="sd">    >>> model.transform(df).show(truncate=False)</span> | 
|  | <span class="sd">    +-----+---------------+-------------------------+</span> | 
|  | <span class="sd">    |label|raw            |vectors                  |</span> | 
|  | <span class="sd">    +-----+---------------+-------------------------+</span> | 
|  | <span class="sd">    |0    |[a, b, c]      |(3,[0,1,2],[1.0,1.0,1.0])|</span> | 
|  | <span class="sd">    |1    |[a, b, b, c, a]|(3,[0,1,2],[2.0,2.0,1.0])|</span> | 
|  | <span class="sd">    +-----+---------------+-------------------------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> sorted(model.vocabulary) == ['a', 'b', 'c']</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> countVectorizerPath = temp_path + "/count-vectorizer"</span> | 
|  | <span class="sd">    >>> cv.save(countVectorizerPath)</span> | 
|  | <span class="sd">    >>> loadedCv = CountVectorizer.load(countVectorizerPath)</span> | 
|  | <span class="sd">    >>> loadedCv.getMinDF() == cv.getMinDF()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedCv.getMinTF() == cv.getMinTF()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedCv.getVocabSize() == cv.getVocabSize()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/count-vectorizer-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = CountVectorizerModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.vocabulary == model.vocabulary</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> fromVocabModel = CountVectorizerModel.from_vocabulary(["a", "b", "c"],</span> | 
|  | <span class="sd">    ...     inputCol="raw", outputCol="vectors")</span> | 
|  | <span class="sd">    >>> fromVocabModel.transform(df).show(truncate=False)</span> | 
|  | <span class="sd">    +-----+---------------+-------------------------+</span> | 
|  | <span class="sd">    |label|raw            |vectors                  |</span> | 
|  | <span class="sd">    +-----+---------------+-------------------------+</span> | 
|  | <span class="sd">    |0    |[a, b, c]      |(3,[0,1,2],[1.0,1.0,1.0])|</span> | 
|  | <span class="sd">    |1    |[a, b, b, c, a]|(3,[0,1,2],[2.0,2.0,1.0])|</span> | 
|  | <span class="sd">    +-----+---------------+-------------------------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">minTF</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> | 
|  | <span class="n">minDF</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> | 
|  | <span class="n">maxDF</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mi">2</span><span class="o">**</span><span class="mi">63</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> | 
|  | <span class="n">vocabSize</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span> <span class="o"><<</span> <span class="mi">18</span><span class="p">,</span> | 
|  | <span class="n">binary</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18,\</span> | 
|  | <span class="sd">                 binary=False, inputCol=None,outputCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">CountVectorizer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.CountVectorizer"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="CountVectorizer.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.CountVectorizer.html#pyspark.ml.feature.CountVectorizer.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">minTF</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> | 
|  | <span class="n">minDF</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> | 
|  | <span class="n">maxDF</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mi">2</span><span class="o">**</span><span class="mi">63</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> | 
|  | <span class="n">vocabSize</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span> <span class="o"><<</span> <span class="mi">18</span><span class="p">,</span> | 
|  | <span class="n">binary</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"CountVectorizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18,\</span> | 
|  | <span class="sd">                  binary=False, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        Set the params for the CountVectorizer</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="CountVectorizer.setMinTF"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.CountVectorizer.html#pyspark.ml.feature.CountVectorizer.setMinTF">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMinTF</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"CountVectorizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`minTF`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minTF</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="CountVectorizer.setMinDF"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.CountVectorizer.html#pyspark.ml.feature.CountVectorizer.setMinDF">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMinDF</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"CountVectorizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`minDF`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minDF</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="CountVectorizer.setMaxDF"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.CountVectorizer.html#pyspark.ml.feature.CountVectorizer.setMaxDF">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMaxDF</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"CountVectorizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`maxDF`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxDF</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="CountVectorizer.setVocabSize"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.CountVectorizer.html#pyspark.ml.feature.CountVectorizer.setVocabSize">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setVocabSize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"CountVectorizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`vocabSize`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">vocabSize</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="CountVectorizer.setBinary"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.CountVectorizer.html#pyspark.ml.feature.CountVectorizer.setBinary">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setBinary</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"CountVectorizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`binary`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">binary</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="CountVectorizer.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.CountVectorizer.html#pyspark.ml.feature.CountVectorizer.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"CountVectorizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="CountVectorizer.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.CountVectorizer.html#pyspark.ml.feature.CountVectorizer.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"CountVectorizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"CountVectorizerModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">CountVectorizerModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="CountVectorizerModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.CountVectorizerModel.html#pyspark.ml.feature.CountVectorizerModel">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">CountVectorizerModel</span><span class="p">(</span> | 
|  | <span class="n">JavaModel</span><span class="p">,</span> <span class="n">_CountVectorizerParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"CountVectorizerModel"</span><span class="p">],</span> <span class="n">JavaMLWritable</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`CountVectorizer`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.6.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="CountVectorizerModel.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.CountVectorizerModel.html#pyspark.ml.feature.CountVectorizerModel.setInputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"CountVectorizerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="CountVectorizerModel.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.CountVectorizerModel.html#pyspark.ml.feature.CountVectorizerModel.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"CountVectorizerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="CountVectorizerModel.from_vocabulary"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.CountVectorizerModel.html#pyspark.ml.feature.CountVectorizerModel.from_vocabulary">[docs]</a>    <span class="nd">@classmethod</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">from_vocabulary</span><span class="p">(</span> | 
|  | <span class="bp">cls</span><span class="p">,</span> | 
|  | <span class="n">vocabulary</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">minTF</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">binary</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"CountVectorizerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Construct the model directly from a vocabulary list of strings,</span> | 
|  | <span class="sd">        requires an active SparkContext.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.core.context</span> <span class="kn">import</span> <span class="n">SparkContext</span> | 
|  |  | 
|  | <span class="n">sc</span> <span class="o">=</span> <span class="n">SparkContext</span><span class="o">.</span><span class="n">_active_spark_context</span> | 
|  | <span class="k">assert</span> <span class="n">sc</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">sc</span><span class="o">.</span><span class="n">_gateway</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> | 
|  | <span class="n">java_class</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_gateway</span><span class="o">.</span><span class="n">jvm</span><span class="o">.</span><span class="n">java</span><span class="o">.</span><span class="n">lang</span><span class="o">.</span><span class="n">String</span> | 
|  | <span class="n">jvocab</span> <span class="o">=</span> <span class="n">CountVectorizerModel</span><span class="o">.</span><span class="n">_new_java_array</span><span class="p">(</span><span class="n">vocabulary</span><span class="p">,</span> <span class="n">java_class</span><span class="p">)</span> | 
|  | <span class="n">model</span> <span class="o">=</span> <span class="n">CountVectorizerModel</span><span class="o">.</span><span class="n">_create_from_java_class</span><span class="p">(</span> | 
|  | <span class="s2">"org.apache.spark.ml.feature.CountVectorizerModel"</span><span class="p">,</span> <span class="n">jvocab</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">model</span><span class="o">.</span><span class="n">setInputCol</span><span class="p">(</span><span class="n">inputCol</span><span class="p">)</span> | 
|  | <span class="k">if</span> <span class="n">outputCol</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="n">model</span><span class="o">.</span><span class="n">setOutputCol</span><span class="p">(</span><span class="n">outputCol</span><span class="p">)</span> | 
|  | <span class="k">if</span> <span class="n">minTF</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="n">model</span><span class="o">.</span><span class="n">setMinTF</span><span class="p">(</span><span class="n">minTF</span><span class="p">)</span> | 
|  | <span class="k">if</span> <span class="n">binary</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="n">model</span><span class="o">.</span><span class="n">setBinary</span><span class="p">(</span><span class="n">binary</span><span class="p">)</span> | 
|  | <span class="n">model</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">vocabSize</span><span class="o">=</span><span class="nb">len</span><span class="p">(</span><span class="n">vocabulary</span><span class="p">))</span> | 
|  | <span class="k">return</span> <span class="n">model</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">vocabulary</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        An array of terms in the vocabulary.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"vocabulary"</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="CountVectorizerModel.setMinTF"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.CountVectorizerModel.html#pyspark.ml.feature.CountVectorizerModel.setMinTF">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMinTF</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"CountVectorizerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`minTF`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minTF</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="CountVectorizerModel.setBinary"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.CountVectorizerModel.html#pyspark.ml.feature.CountVectorizerModel.setBinary">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setBinary</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"CountVectorizerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`binary`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">binary</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="DCT"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.DCT.html#pyspark.ml.feature.DCT">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">DCT</span><span class="p">(</span><span class="n">JavaTransformer</span><span class="p">,</span> <span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"DCT"</span><span class="p">],</span> <span class="n">JavaMLWritable</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    A feature transformer that takes the 1D discrete cosine transform</span> | 
|  | <span class="sd">    of a real vector. No zero padding is performed on the input vector.</span> | 
|  | <span class="sd">    It returns a real vector of the same length representing the DCT.</span> | 
|  | <span class="sd">    The return vector is scaled such that the transform matrix is</span> | 
|  | <span class="sd">    unitary (aka scaled DCT-II).</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.6.0</span> | 
|  |  | 
|  | <span class="sd">    Notes</span> | 
|  | <span class="sd">    -----</span> | 
|  | <span class="sd">    `More information on Wikipedia \</span> | 
|  | <span class="sd">      <https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia>`_.</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> df1 = spark.createDataFrame([(Vectors.dense([5.0, 8.0, 6.0]),)], ["vec"])</span> | 
|  | <span class="sd">    >>> dct = DCT( )</span> | 
|  | <span class="sd">    >>> dct.setInverse(False)</span> | 
|  | <span class="sd">    DCT...</span> | 
|  | <span class="sd">    >>> dct.setInputCol("vec")</span> | 
|  | <span class="sd">    DCT...</span> | 
|  | <span class="sd">    >>> dct.setOutputCol("resultVec")</span> | 
|  | <span class="sd">    DCT...</span> | 
|  | <span class="sd">    >>> df2 = dct.transform(df1)</span> | 
|  | <span class="sd">    >>> df2.head().resultVec</span> | 
|  | <span class="sd">    DenseVector([10.969..., -0.707..., -2.041...])</span> | 
|  | <span class="sd">    >>> df3 = DCT(inverse=True, inputCol="resultVec", outputCol="origVec").transform(df2)</span> | 
|  | <span class="sd">    >>> df3.head().origVec</span> | 
|  | <span class="sd">    DenseVector([5.0, 8.0, 6.0])</span> | 
|  | <span class="sd">    >>> dctPath = temp_path + "/dct"</span> | 
|  | <span class="sd">    >>> dct.save(dctPath)</span> | 
|  | <span class="sd">    >>> loadedDtc = DCT.load(dctPath)</span> | 
|  | <span class="sd">    >>> loadedDtc.transform(df1).take(1) == dct.transform(df1).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedDtc.getInverse()</span> | 
|  | <span class="sd">    False</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">inverse</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"inverse"</span><span class="p">,</span> | 
|  | <span class="s2">"Set transformer to perform inverse DCT, "</span> <span class="o">+</span> <span class="s2">"default False."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toBoolean</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inverse</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, inverse=False, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">DCT</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.DCT"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">inverse</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="DCT.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.DCT.html#pyspark.ml.feature.DCT.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inverse</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"DCT"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, inverse=False, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        Sets params for this DCT.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="DCT.setInverse"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.DCT.html#pyspark.ml.feature.DCT.setInverse">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInverse</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DCT"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inverse`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inverse</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="DCT.getInverse"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.DCT.html#pyspark.ml.feature.DCT.getInverse">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getInverse</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of inverse or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">inverse</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="DCT.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.DCT.html#pyspark.ml.feature.DCT.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DCT"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="DCT.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.DCT.html#pyspark.ml.feature.DCT.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DCT"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="ElementwiseProduct"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.ElementwiseProduct.html#pyspark.ml.feature.ElementwiseProduct">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">ElementwiseProduct</span><span class="p">(</span> | 
|  | <span class="n">JavaTransformer</span><span class="p">,</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"ElementwiseProduct"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Outputs the Hadamard product (i.e., the element-wise product) of each input vector</span> | 
|  | <span class="sd">    with a provided "weight" vector. In other words, it scales each column of the dataset</span> | 
|  | <span class="sd">    by a scalar multiplier.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.5.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(Vectors.dense([2.0, 1.0, 3.0]),)], ["values"])</span> | 
|  | <span class="sd">    >>> ep = ElementwiseProduct()</span> | 
|  | <span class="sd">    >>> ep.setScalingVec(Vectors.dense([1.0, 2.0, 3.0]))</span> | 
|  | <span class="sd">    ElementwiseProduct...</span> | 
|  | <span class="sd">    >>> ep.setInputCol("values")</span> | 
|  | <span class="sd">    ElementwiseProduct...</span> | 
|  | <span class="sd">    >>> ep.setOutputCol("eprod")</span> | 
|  | <span class="sd">    ElementwiseProduct...</span> | 
|  | <span class="sd">    >>> ep.transform(df).head().eprod</span> | 
|  | <span class="sd">    DenseVector([2.0, 2.0, 9.0])</span> | 
|  | <span class="sd">    >>> ep.setParams(scalingVec=Vectors.dense([2.0, 3.0, 5.0])).transform(df).head().eprod</span> | 
|  | <span class="sd">    DenseVector([4.0, 3.0, 15.0])</span> | 
|  | <span class="sd">    >>> elementwiseProductPath = temp_path + "/elementwise-product"</span> | 
|  | <span class="sd">    >>> ep.save(elementwiseProductPath)</span> | 
|  | <span class="sd">    >>> loadedEp = ElementwiseProduct.load(elementwiseProductPath)</span> | 
|  | <span class="sd">    >>> loadedEp.getScalingVec() == ep.getScalingVec()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedEp.transform(df).take(1) == ep.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">scalingVec</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="n">Vector</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"scalingVec"</span><span class="p">,</span> | 
|  | <span class="s2">"Vector for hadamard product."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toVector</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">scalingVec</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Vector</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, scalingVec=None, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">ElementwiseProduct</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> | 
|  | <span class="s2">"org.apache.spark.ml.feature.ElementwiseProduct"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="ElementwiseProduct.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.ElementwiseProduct.html#pyspark.ml.feature.ElementwiseProduct.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">scalingVec</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Vector</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"ElementwiseProduct"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, scalingVec=None, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        Sets params for this ElementwiseProduct.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="ElementwiseProduct.setScalingVec"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.ElementwiseProduct.html#pyspark.ml.feature.ElementwiseProduct.setScalingVec">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setScalingVec</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">Vector</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"ElementwiseProduct"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`scalingVec`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">scalingVec</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="ElementwiseProduct.getScalingVec"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.ElementwiseProduct.html#pyspark.ml.feature.ElementwiseProduct.getScalingVec">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getScalingVec</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of scalingVec or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">scalingVec</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="ElementwiseProduct.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.ElementwiseProduct.html#pyspark.ml.feature.ElementwiseProduct.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"ElementwiseProduct"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="ElementwiseProduct.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.ElementwiseProduct.html#pyspark.ml.feature.ElementwiseProduct.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"ElementwiseProduct"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="FeatureHasher"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.FeatureHasher.html#pyspark.ml.feature.FeatureHasher">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">FeatureHasher</span><span class="p">(</span> | 
|  | <span class="n">JavaTransformer</span><span class="p">,</span> | 
|  | <span class="n">HasInputCols</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">HasNumFeatures</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"FeatureHasher"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Feature hashing projects a set of categorical or numerical features into a feature vector of</span> | 
|  | <span class="sd">    specified dimension (typically substantially smaller than that of the original feature</span> | 
|  | <span class="sd">    space). This is done using the hashing trick (https://en.wikipedia.org/wiki/Feature_hashing)</span> | 
|  | <span class="sd">    to map features to indices in the feature vector.</span> | 
|  |  | 
|  | <span class="sd">    The FeatureHasher transformer operates on multiple columns. Each column may contain either</span> | 
|  | <span class="sd">    numeric or categorical features. Behavior and handling of column data types is as follows:</span> | 
|  |  | 
|  | <span class="sd">    * Numeric columns:</span> | 
|  | <span class="sd">        For numeric features, the hash value of the column name is used to map the</span> | 
|  | <span class="sd">        feature value to its index in the feature vector. By default, numeric features</span> | 
|  | <span class="sd">        are not treated as categorical (even when they are integers). To treat them</span> | 
|  | <span class="sd">        as categorical, specify the relevant columns in `categoricalCols`.</span> | 
|  |  | 
|  | <span class="sd">    * String columns:</span> | 
|  | <span class="sd">        For categorical features, the hash value of the string "column_name=value"</span> | 
|  | <span class="sd">        is used to map to the vector index, with an indicator value of `1.0`.</span> | 
|  | <span class="sd">        Thus, categorical features are "one-hot" encoded</span> | 
|  | <span class="sd">        (similarly to using :py:class:`OneHotEncoder` with `dropLast=false`).</span> | 
|  |  | 
|  | <span class="sd">    * Boolean columns:</span> | 
|  | <span class="sd">        Boolean values are treated in the same way as string columns. That is,</span> | 
|  | <span class="sd">        boolean features are represented as "column_name=true" or "column_name=false",</span> | 
|  | <span class="sd">        with an indicator value of `1.0`.</span> | 
|  |  | 
|  | <span class="sd">    Null (missing) values are ignored (implicitly zero in the resulting feature vector).</span> | 
|  |  | 
|  | <span class="sd">    Since a simple modulo is used to transform the hash function to a vector index,</span> | 
|  | <span class="sd">    it is advisable to use a power of two as the `numFeatures` parameter;</span> | 
|  | <span class="sd">    otherwise the features will not be mapped evenly to the vector indices.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.3.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> data = [(2.0, True, "1", "foo"), (3.0, False, "2", "bar")]</span> | 
|  | <span class="sd">    >>> cols = ["real", "bool", "stringNum", "string"]</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame(data, cols)</span> | 
|  | <span class="sd">    >>> hasher = FeatureHasher()</span> | 
|  | <span class="sd">    >>> hasher.setInputCols(cols)</span> | 
|  | <span class="sd">    FeatureHasher...</span> | 
|  | <span class="sd">    >>> hasher.setOutputCol("features")</span> | 
|  | <span class="sd">    FeatureHasher...</span> | 
|  | <span class="sd">    >>> hasher.transform(df).head().features</span> | 
|  | <span class="sd">    SparseVector(262144, {174475: 2.0, 247670: 1.0, 257907: 1.0, 262126: 1.0})</span> | 
|  | <span class="sd">    >>> hasher.setCategoricalCols(["real"]).transform(df).head().features</span> | 
|  | <span class="sd">    SparseVector(262144, {171257: 1.0, 247670: 1.0, 257907: 1.0, 262126: 1.0})</span> | 
|  | <span class="sd">    >>> hasherPath = temp_path + "/hasher"</span> | 
|  | <span class="sd">    >>> hasher.save(hasherPath)</span> | 
|  | <span class="sd">    >>> loadedHasher = FeatureHasher.load(hasherPath)</span> | 
|  | <span class="sd">    >>> loadedHasher.getNumFeatures() == hasher.getNumFeatures()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedHasher.transform(df).head().features == hasher.transform(df).head().features</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">categoricalCols</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"categoricalCols"</span><span class="p">,</span> | 
|  | <span class="s2">"numeric columns to treat as categorical"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toListString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">numFeatures</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span> <span class="o"><<</span> <span class="mi">18</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">categoricalCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, numFeatures=1 << 18, inputCols=None, outputCol=None, \</span> | 
|  | <span class="sd">                 categoricalCols=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">FeatureHasher</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.FeatureHasher"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">numFeatures</span><span class="o">=</span><span class="mi">1</span> <span class="o"><<</span> <span class="mi">18</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="FeatureHasher.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.FeatureHasher.html#pyspark.ml.feature.FeatureHasher.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">numFeatures</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span> <span class="o"><<</span> <span class="mi">18</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">categoricalCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"FeatureHasher"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, numFeatures=1 << 18, inputCols=None, outputCol=None, \</span> | 
|  | <span class="sd">                  categoricalCols=None)</span> | 
|  | <span class="sd">        Sets params for this FeatureHasher.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="FeatureHasher.setCategoricalCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.FeatureHasher.html#pyspark.ml.feature.FeatureHasher.setCategoricalCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setCategoricalCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"FeatureHasher"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`categoricalCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">categoricalCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="FeatureHasher.getCategoricalCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.FeatureHasher.html#pyspark.ml.feature.FeatureHasher.getCategoricalCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getCategoricalCols</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of binary or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">categoricalCols</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="FeatureHasher.setInputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.FeatureHasher.html#pyspark.ml.feature.FeatureHasher.setInputCols">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"FeatureHasher"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="FeatureHasher.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.FeatureHasher.html#pyspark.ml.feature.FeatureHasher.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"FeatureHasher"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="FeatureHasher.setNumFeatures"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.FeatureHasher.html#pyspark.ml.feature.FeatureHasher.setNumFeatures">[docs]</a>    <span class="k">def</span> <span class="nf">setNumFeatures</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"FeatureHasher"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`numFeatures`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">numFeatures</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="HashingTF"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.HashingTF.html#pyspark.ml.feature.HashingTF">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">HashingTF</span><span class="p">(</span> | 
|  | <span class="n">JavaTransformer</span><span class="p">,</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">HasNumFeatures</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"HashingTF"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Maps a sequence of terms to their term frequencies using the hashing trick.</span> | 
|  | <span class="sd">    Currently we use Austin Appleby's MurmurHash 3 algorithm (MurmurHash3_x86_32)</span> | 
|  | <span class="sd">    to calculate the hash code value for the term object.</span> | 
|  | <span class="sd">    Since a simple modulo is used to transform the hash function to a column index,</span> | 
|  | <span class="sd">    it is advisable to use a power of two as the numFeatures parameter;</span> | 
|  | <span class="sd">    otherwise the features will not be mapped evenly to the columns.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.3.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(["a", "b", "c"],)], ["words"])</span> | 
|  | <span class="sd">    >>> hashingTF = HashingTF(inputCol="words", outputCol="features")</span> | 
|  | <span class="sd">    >>> hashingTF.setNumFeatures(10)</span> | 
|  | <span class="sd">    HashingTF...</span> | 
|  | <span class="sd">    >>> hashingTF.transform(df).head().features</span> | 
|  | <span class="sd">    SparseVector(10, {5: 1.0, 7: 1.0, 8: 1.0})</span> | 
|  | <span class="sd">    >>> hashingTF.setParams(outputCol="freqs").transform(df).head().freqs</span> | 
|  | <span class="sd">    SparseVector(10, {5: 1.0, 7: 1.0, 8: 1.0})</span> | 
|  | <span class="sd">    >>> params = {hashingTF.numFeatures: 5, hashingTF.outputCol: "vector"}</span> | 
|  | <span class="sd">    >>> hashingTF.transform(df, params).head().vector</span> | 
|  | <span class="sd">    SparseVector(5, {0: 1.0, 2: 1.0, 3: 1.0})</span> | 
|  | <span class="sd">    >>> hashingTFPath = temp_path + "/hashing-tf"</span> | 
|  | <span class="sd">    >>> hashingTF.save(hashingTFPath)</span> | 
|  | <span class="sd">    >>> loadedHashingTF = HashingTF.load(hashingTFPath)</span> | 
|  | <span class="sd">    >>> loadedHashingTF.getNumFeatures() == hashingTF.getNumFeatures()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedHashingTF.transform(df).take(1) == hashingTF.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> hashingTF.indexOf("b")</span> | 
|  | <span class="sd">    5</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">binary</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"binary"</span><span class="p">,</span> | 
|  | <span class="s2">"If True, all non zero counts are set to 1. "</span> | 
|  | <span class="o">+</span> <span class="s2">"This is useful for discrete probabilistic models that model binary events "</span> | 
|  | <span class="o">+</span> <span class="s2">"rather than integer counts. Default False."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toBoolean</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">numFeatures</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span> <span class="o"><<</span> <span class="mi">18</span><span class="p">,</span> | 
|  | <span class="n">binary</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">HashingTF</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.HashingTF"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">numFeatures</span><span class="o">=</span><span class="mi">1</span> <span class="o"><<</span> <span class="mi">18</span><span class="p">,</span> <span class="n">binary</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="HashingTF.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.HashingTF.html#pyspark.ml.feature.HashingTF.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">numFeatures</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span> <span class="o"><<</span> <span class="mi">18</span><span class="p">,</span> | 
|  | <span class="n">binary</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"HashingTF"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        Sets params for this HashingTF.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="HashingTF.setBinary"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.HashingTF.html#pyspark.ml.feature.HashingTF.setBinary">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setBinary</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"HashingTF"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`binary`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">binary</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="HashingTF.getBinary"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.HashingTF.html#pyspark.ml.feature.HashingTF.getBinary">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getBinary</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of binary or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">binary</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="HashingTF.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.HashingTF.html#pyspark.ml.feature.HashingTF.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"HashingTF"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="HashingTF.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.HashingTF.html#pyspark.ml.feature.HashingTF.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"HashingTF"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="HashingTF.setNumFeatures"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.HashingTF.html#pyspark.ml.feature.HashingTF.setNumFeatures">[docs]</a>    <span class="k">def</span> <span class="nf">setNumFeatures</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"HashingTF"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`numFeatures`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">numFeatures</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="HashingTF.indexOf"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.HashingTF.html#pyspark.ml.feature.HashingTF.indexOf">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">indexOf</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">term</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Returns the index of the input term.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_transfer_params_to_java</span><span class="p">()</span> | 
|  | <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span><span class="o">.</span><span class="n">indexOf</span><span class="p">(</span><span class="n">term</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_IDFParams</span><span class="p">(</span><span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`IDF` and :py:class:`IDFModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">minDocFreq</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"minDocFreq"</span><span class="p">,</span> | 
|  | <span class="s2">"minimum number of documents in which a term should appear for filtering"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getMinDocFreq</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of minDocFreq or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">minDocFreq</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_IDFParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">minDocFreq</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="IDF"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.IDF.html#pyspark.ml.feature.IDF">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">IDF</span><span class="p">(</span><span class="n">JavaEstimator</span><span class="p">[</span><span class="s2">"IDFModel"</span><span class="p">],</span> <span class="n">_IDFParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"IDF"</span><span class="p">],</span> <span class="n">JavaMLWritable</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Compute the Inverse Document Frequency (IDF) given a collection of documents.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import DenseVector</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(DenseVector([1.0, 2.0]),),</span> | 
|  | <span class="sd">    ...     (DenseVector([0.0, 1.0]),), (DenseVector([3.0, 0.2]),)], ["tf"])</span> | 
|  | <span class="sd">    >>> idf = IDF(minDocFreq=3)</span> | 
|  | <span class="sd">    >>> idf.setInputCol("tf")</span> | 
|  | <span class="sd">    IDF...</span> | 
|  | <span class="sd">    >>> idf.setOutputCol("idf")</span> | 
|  | <span class="sd">    IDF...</span> | 
|  | <span class="sd">    >>> model = idf.fit(df)</span> | 
|  | <span class="sd">    >>> model.setOutputCol("idf")</span> | 
|  | <span class="sd">    IDFModel...</span> | 
|  | <span class="sd">    >>> model.getMinDocFreq()</span> | 
|  | <span class="sd">    3</span> | 
|  | <span class="sd">    >>> model.idf</span> | 
|  | <span class="sd">    DenseVector([0.0, 0.0])</span> | 
|  | <span class="sd">    >>> model.docFreq</span> | 
|  | <span class="sd">    [0, 3]</span> | 
|  | <span class="sd">    >>> model.numDocs == df.count()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> model.transform(df).head().idf</span> | 
|  | <span class="sd">    DenseVector([0.0, 0.0])</span> | 
|  | <span class="sd">    >>> idf.setParams(outputCol="freqs").fit(df).transform(df).collect()[1].freqs</span> | 
|  | <span class="sd">    DenseVector([0.0, 0.0])</span> | 
|  | <span class="sd">    >>> params = {idf.minDocFreq: 1, idf.outputCol: "vector"}</span> | 
|  | <span class="sd">    >>> idf.fit(df, params).transform(df).head().vector</span> | 
|  | <span class="sd">    DenseVector([0.2877, 0.0])</span> | 
|  | <span class="sd">    >>> idfPath = temp_path + "/idf"</span> | 
|  | <span class="sd">    >>> idf.save(idfPath)</span> | 
|  | <span class="sd">    >>> loadedIdf = IDF.load(idfPath)</span> | 
|  | <span class="sd">    >>> loadedIdf.getMinDocFreq() == idf.getMinDocFreq()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/idf-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = IDFModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.transform(df).head().idf == model.transform(df).head().idf</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">minDocFreq</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, minDocFreq=0, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">IDF</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.IDF"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="IDF.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.IDF.html#pyspark.ml.feature.IDF.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">minDocFreq</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"IDF"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, minDocFreq=0, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        Sets params for this IDF.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="IDF.setMinDocFreq"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.IDF.html#pyspark.ml.feature.IDF.setMinDocFreq">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMinDocFreq</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IDF"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`minDocFreq`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minDocFreq</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="IDF.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.IDF.html#pyspark.ml.feature.IDF.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IDF"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="IDF.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.IDF.html#pyspark.ml.feature.IDF.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IDF"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IDFModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">IDFModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="IDFModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.IDFModel.html#pyspark.ml.feature.IDFModel">[docs]</a><span class="k">class</span> <span class="nc">IDFModel</span><span class="p">(</span><span class="n">JavaModel</span><span class="p">,</span> <span class="n">_IDFParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"IDFModel"</span><span class="p">],</span> <span class="n">JavaMLWritable</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`IDF`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="IDFModel.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.IDFModel.html#pyspark.ml.feature.IDFModel.setInputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IDFModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="IDFModel.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.IDFModel.html#pyspark.ml.feature.IDFModel.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IDFModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">idf</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Returns the IDF vector.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"idf"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">docFreq</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Returns the document frequency.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"docFreq"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">numDocs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Returns number of documents evaluated to compute idf</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"numDocs"</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_ImputerParams</span><span class="p">(</span><span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasInputCols</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">,</span> <span class="n">HasOutputCols</span><span class="p">,</span> <span class="n">HasRelativeError</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`Imputer` and :py:class:`ImputerModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">strategy</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"strategy"</span><span class="p">,</span> | 
|  | <span class="s2">"strategy for imputation. If mean, then replace missing values using the mean "</span> | 
|  | <span class="s2">"value of the feature. If median, then replace missing values using the "</span> | 
|  | <span class="s2">"median value of the feature. If mode, then replace missing using the most "</span> | 
|  | <span class="s2">"frequent value of the feature."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">missingValue</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"missingValue"</span><span class="p">,</span> | 
|  | <span class="s2">"The placeholder for the missing values. All occurrences of missingValue "</span> | 
|  | <span class="s2">"will be imputed."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_ImputerParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">strategy</span><span class="o">=</span><span class="s2">"mean"</span><span class="p">,</span> <span class="n">missingValue</span><span class="o">=</span><span class="nb">float</span><span class="p">(</span><span class="s2">"nan"</span><span class="p">),</span> <span class="n">relativeError</span><span class="o">=</span><span class="mf">0.001</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getStrategy</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`strategy` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">strategy</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getMissingValue</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`missingValue` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">missingValue</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="Imputer"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Imputer.html#pyspark.ml.feature.Imputer">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">Imputer</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">[</span><span class="s2">"ImputerModel"</span><span class="p">],</span> <span class="n">_ImputerParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"Imputer"</span><span class="p">],</span> <span class="n">JavaMLWritable</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Imputation estimator for completing missing values, using the mean, median or mode</span> | 
|  | <span class="sd">    of the columns in which the missing values are located. The input columns should be of</span> | 
|  | <span class="sd">    numeric type. Currently Imputer does not support categorical features and</span> | 
|  | <span class="sd">    possibly creates incorrect values for a categorical feature.</span> | 
|  |  | 
|  | <span class="sd">    Note that the mean/median/mode value is computed after filtering out missing values.</span> | 
|  | <span class="sd">    All Null values in the input columns are treated as missing, and so are also imputed. For</span> | 
|  | <span class="sd">    computing median, :py:meth:`pyspark.sql.DataFrame.approxQuantile` is used with a</span> | 
|  | <span class="sd">    relative error of `0.001`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.2.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(1.0, float("nan")), (2.0, float("nan")), (float("nan"), 3.0),</span> | 
|  | <span class="sd">    ...                             (4.0, 4.0), (5.0, 5.0)], ["a", "b"])</span> | 
|  | <span class="sd">    >>> imputer = Imputer()</span> | 
|  | <span class="sd">    >>> imputer.setInputCols(["a", "b"])</span> | 
|  | <span class="sd">    Imputer...</span> | 
|  | <span class="sd">    >>> imputer.setOutputCols(["out_a", "out_b"])</span> | 
|  | <span class="sd">    Imputer...</span> | 
|  | <span class="sd">    >>> imputer.getRelativeError()</span> | 
|  | <span class="sd">    0.001</span> | 
|  | <span class="sd">    >>> model = imputer.fit(df)</span> | 
|  | <span class="sd">    >>> model.setInputCols(["a", "b"])</span> | 
|  | <span class="sd">    ImputerModel...</span> | 
|  | <span class="sd">    >>> model.getStrategy()</span> | 
|  | <span class="sd">    'mean'</span> | 
|  | <span class="sd">    >>> model.surrogateDF.show()</span> | 
|  | <span class="sd">    +---+---+</span> | 
|  | <span class="sd">    |  a|  b|</span> | 
|  | <span class="sd">    +---+---+</span> | 
|  | <span class="sd">    |3.0|4.0|</span> | 
|  | <span class="sd">    +---+---+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> model.transform(df).show()</span> | 
|  | <span class="sd">    +---+---+-----+-----+</span> | 
|  | <span class="sd">    |  a|  b|out_a|out_b|</span> | 
|  | <span class="sd">    +---+---+-----+-----+</span> | 
|  | <span class="sd">    |1.0|NaN|  1.0|  4.0|</span> | 
|  | <span class="sd">    |2.0|NaN|  2.0|  4.0|</span> | 
|  | <span class="sd">    |NaN|3.0|  3.0|  3.0|</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> imputer.setStrategy("median").setMissingValue(1.0).fit(df).transform(df).show()</span> | 
|  | <span class="sd">    +---+---+-----+-----+</span> | 
|  | <span class="sd">    |  a|  b|out_a|out_b|</span> | 
|  | <span class="sd">    +---+---+-----+-----+</span> | 
|  | <span class="sd">    |1.0|NaN|  4.0|  NaN|</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> df1 = spark.createDataFrame([(1.0,), (2.0,), (float("nan"),), (4.0,), (5.0,)], ["a"])</span> | 
|  | <span class="sd">    >>> imputer1 = Imputer(inputCol="a", outputCol="out_a")</span> | 
|  | <span class="sd">    >>> model1 = imputer1.fit(df1)</span> | 
|  | <span class="sd">    >>> model1.surrogateDF.show()</span> | 
|  | <span class="sd">    +---+</span> | 
|  | <span class="sd">    |  a|</span> | 
|  | <span class="sd">    +---+</span> | 
|  | <span class="sd">    |3.0|</span> | 
|  | <span class="sd">    +---+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> model1.transform(df1).show()</span> | 
|  | <span class="sd">    +---+-----+</span> | 
|  | <span class="sd">    |  a|out_a|</span> | 
|  | <span class="sd">    +---+-----+</span> | 
|  | <span class="sd">    |1.0|  1.0|</span> | 
|  | <span class="sd">    |2.0|  2.0|</span> | 
|  | <span class="sd">    |NaN|  3.0|</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> imputer1.setStrategy("median").setMissingValue(1.0).fit(df1).transform(df1).show()</span> | 
|  | <span class="sd">    +---+-----+</span> | 
|  | <span class="sd">    |  a|out_a|</span> | 
|  | <span class="sd">    +---+-----+</span> | 
|  | <span class="sd">    |1.0|  4.0|</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> df2 = spark.createDataFrame([(float("nan"),), (float("nan"),), (3.0,), (4.0,), (5.0,)],</span> | 
|  | <span class="sd">    ...                             ["b"])</span> | 
|  | <span class="sd">    >>> imputer2 = Imputer(inputCol="b", outputCol="out_b")</span> | 
|  | <span class="sd">    >>> model2 = imputer2.fit(df2)</span> | 
|  | <span class="sd">    >>> model2.surrogateDF.show()</span> | 
|  | <span class="sd">    +---+</span> | 
|  | <span class="sd">    |  b|</span> | 
|  | <span class="sd">    +---+</span> | 
|  | <span class="sd">    |4.0|</span> | 
|  | <span class="sd">    +---+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> model2.transform(df2).show()</span> | 
|  | <span class="sd">    +---+-----+</span> | 
|  | <span class="sd">    |  b|out_b|</span> | 
|  | <span class="sd">    +---+-----+</span> | 
|  | <span class="sd">    |NaN|  4.0|</span> | 
|  | <span class="sd">    |NaN|  4.0|</span> | 
|  | <span class="sd">    |3.0|  3.0|</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> imputer2.setStrategy("median").setMissingValue(1.0).fit(df2).transform(df2).show()</span> | 
|  | <span class="sd">    +---+-----+</span> | 
|  | <span class="sd">    |  b|out_b|</span> | 
|  | <span class="sd">    +---+-----+</span> | 
|  | <span class="sd">    |NaN|  NaN|</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> imputerPath = temp_path + "/imputer"</span> | 
|  | <span class="sd">    >>> imputer.save(imputerPath)</span> | 
|  | <span class="sd">    >>> loadedImputer = Imputer.load(imputerPath)</span> | 
|  | <span class="sd">    >>> loadedImputer.getStrategy() == imputer.getStrategy()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedImputer.getMissingValue()</span> | 
|  | <span class="sd">    1.0</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/imputer-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = ImputerModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.transform(df).head().out_a == model.transform(df).head().out_a</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">strategy</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">missingValue</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">relativeError</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">strategy</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">missingValue</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">relativeError</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">strategy</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"mean"</span><span class="p">,</span> | 
|  | <span class="n">missingValue</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="s2">"nan"</span><span class="p">),</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">relativeError</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.001</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, strategy="mean", missingValue=float("nan"), inputCols=None, \</span> | 
|  | <span class="sd">                 outputCols=None, inputCol=None, outputCol=None, relativeError=0.001):</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">Imputer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.Imputer"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">strategy</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">missingValue</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">relativeError</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"Imputer"</span><span class="p">:</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">strategy</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">missingValue</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">relativeError</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"Imputer"</span><span class="p">:</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <div class="viewcode-block" id="Imputer.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Imputer.html#pyspark.ml.feature.Imputer.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">strategy</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"mean"</span><span class="p">,</span> | 
|  | <span class="n">missingValue</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="s2">"nan"</span><span class="p">),</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">relativeError</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.001</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"Imputer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, strategy="mean", missingValue=float("nan"), inputCols=None, \</span> | 
|  | <span class="sd">                  outputCols=None, inputCol=None, outputCol=None, relativeError=0.001)</span> | 
|  | <span class="sd">        Sets params for this Imputer.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Imputer.setStrategy"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Imputer.html#pyspark.ml.feature.Imputer.setStrategy">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setStrategy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Imputer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`strategy`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">strategy</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Imputer.setMissingValue"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Imputer.html#pyspark.ml.feature.Imputer.setMissingValue">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMissingValue</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Imputer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`missingValue`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">missingValue</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Imputer.setInputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Imputer.html#pyspark.ml.feature.Imputer.setInputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Imputer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Imputer.setOutputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Imputer.html#pyspark.ml.feature.Imputer.setOutputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Imputer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Imputer.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Imputer.html#pyspark.ml.feature.Imputer.setInputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Imputer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Imputer.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Imputer.html#pyspark.ml.feature.Imputer.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Imputer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Imputer.setRelativeError"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Imputer.html#pyspark.ml.feature.Imputer.setRelativeError">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setRelativeError</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Imputer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`relativeError`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">relativeError</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"ImputerModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">ImputerModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="ImputerModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.ImputerModel.html#pyspark.ml.feature.ImputerModel">[docs]</a><span class="k">class</span> <span class="nc">ImputerModel</span><span class="p">(</span><span class="n">JavaModel</span><span class="p">,</span> <span class="n">_ImputerParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"ImputerModel"</span><span class="p">],</span> <span class="n">JavaMLWritable</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`Imputer`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.2.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="ImputerModel.setInputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.ImputerModel.html#pyspark.ml.feature.ImputerModel.setInputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"ImputerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="ImputerModel.setOutputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.ImputerModel.html#pyspark.ml.feature.ImputerModel.setOutputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"ImputerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="ImputerModel.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.ImputerModel.html#pyspark.ml.feature.ImputerModel.setInputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"ImputerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="ImputerModel.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.ImputerModel.html#pyspark.ml.feature.ImputerModel.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"ImputerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">surrogateDF</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Returns a DataFrame containing inputCols and their corresponding surrogates,</span> | 
|  | <span class="sd">        which are used to replace the missing values in the input DataFrame.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"surrogateDF"</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="Interaction"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Interaction.html#pyspark.ml.feature.Interaction">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">Interaction</span><span class="p">(</span> | 
|  | <span class="n">JavaTransformer</span><span class="p">,</span> | 
|  | <span class="n">HasInputCols</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"Interaction"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Implements the feature interaction transform. This transformer takes in Double and Vector type</span> | 
|  | <span class="sd">    columns and outputs a flattened vector of their feature interactions. To handle interaction,</span> | 
|  | <span class="sd">    we first one-hot encode any nominal features. Then, a vector of the feature cross-products is</span> | 
|  | <span class="sd">    produced.</span> | 
|  |  | 
|  | <span class="sd">    For example, given the input feature values `Double(2)` and `Vector(3, 4)`, the output would be</span> | 
|  | <span class="sd">    `Vector(6, 8)` if all input features were numeric. If the first feature was instead nominal</span> | 
|  | <span class="sd">    with four categories, the output would then be `Vector(0, 0, 0, 0, 3, 4, 0, 0)`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(0.0, 1.0), (2.0, 3.0)], ["a", "b"])</span> | 
|  | <span class="sd">    >>> interaction = Interaction()</span> | 
|  | <span class="sd">    >>> interaction.setInputCols(["a", "b"])</span> | 
|  | <span class="sd">    Interaction...</span> | 
|  | <span class="sd">    >>> interaction.setOutputCol("ab")</span> | 
|  | <span class="sd">    Interaction...</span> | 
|  | <span class="sd">    >>> interaction.transform(df).show()</span> | 
|  | <span class="sd">    +---+---+-----+</span> | 
|  | <span class="sd">    |  a|  b|   ab|</span> | 
|  | <span class="sd">    +---+---+-----+</span> | 
|  | <span class="sd">    |0.0|1.0|[0.0]|</span> | 
|  | <span class="sd">    |2.0|3.0|[6.0]|</span> | 
|  | <span class="sd">    +---+---+-----+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> interactionPath = temp_path + "/interaction"</span> | 
|  | <span class="sd">    >>> interaction.save(interactionPath)</span> | 
|  | <span class="sd">    >>> loadedInteraction = Interaction.load(interactionPath)</span> | 
|  | <span class="sd">    >>> loadedInteraction.transform(df).head().ab == interaction.transform(df).head().ab</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, inputCols=None, outputCol=None):</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">Interaction</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.Interaction"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">()</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="Interaction.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Interaction.html#pyspark.ml.feature.Interaction.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"Interaction"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, inputCols=None, outputCol=None)</span> | 
|  | <span class="sd">        Sets params for this Interaction.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Interaction.setInputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Interaction.html#pyspark.ml.feature.Interaction.setInputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"Interaction"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Interaction.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Interaction.html#pyspark.ml.feature.Interaction.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Interaction"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_MaxAbsScalerParams</span><span class="p">(</span><span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`MaxAbsScaler` and :py:class:`MaxAbsScalerModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="k">pass</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="MaxAbsScaler"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MaxAbsScaler.html#pyspark.ml.feature.MaxAbsScaler">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">MaxAbsScaler</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">[</span><span class="s2">"MaxAbsScalerModel"</span><span class="p">],</span> | 
|  | <span class="n">_MaxAbsScalerParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"MaxAbsScaler"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Rescale each feature individually to range [-1, 1] by dividing through the largest maximum</span> | 
|  | <span class="sd">    absolute value in each feature. It does not shift/center the data, and thus does not destroy</span> | 
|  | <span class="sd">    any sparsity.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.0.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(Vectors.dense([1.0]),), (Vectors.dense([2.0]),)], ["a"])</span> | 
|  | <span class="sd">    >>> maScaler = MaxAbsScaler(outputCol="scaled")</span> | 
|  | <span class="sd">    >>> maScaler.setInputCol("a")</span> | 
|  | <span class="sd">    MaxAbsScaler...</span> | 
|  | <span class="sd">    >>> model = maScaler.fit(df)</span> | 
|  | <span class="sd">    >>> model.setOutputCol("scaledOutput")</span> | 
|  | <span class="sd">    MaxAbsScalerModel...</span> | 
|  | <span class="sd">    >>> model.transform(df).show()</span> | 
|  | <span class="sd">    +-----+------------+</span> | 
|  | <span class="sd">    |    a|scaledOutput|</span> | 
|  | <span class="sd">    +-----+------------+</span> | 
|  | <span class="sd">    |[1.0]|       [0.5]|</span> | 
|  | <span class="sd">    |[2.0]|       [1.0]|</span> | 
|  | <span class="sd">    +-----+------------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> scalerPath = temp_path + "/max-abs-scaler"</span> | 
|  | <span class="sd">    >>> maScaler.save(scalerPath)</span> | 
|  | <span class="sd">    >>> loadedMAScaler = MaxAbsScaler.load(scalerPath)</span> | 
|  | <span class="sd">    >>> loadedMAScaler.getInputCol() == maScaler.getInputCol()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedMAScaler.getOutputCol() == maScaler.getOutputCol()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/max-abs-scaler-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = MaxAbsScalerModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.maxAbs == model.maxAbs</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">MaxAbsScaler</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.MaxAbsScaler"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">()</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="MaxAbsScaler.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MaxAbsScaler.html#pyspark.ml.feature.MaxAbsScaler.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"MaxAbsScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        Sets params for this MaxAbsScaler.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="MaxAbsScaler.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MaxAbsScaler.html#pyspark.ml.feature.MaxAbsScaler.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MaxAbsScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="MaxAbsScaler.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MaxAbsScaler.html#pyspark.ml.feature.MaxAbsScaler.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MaxAbsScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MaxAbsScalerModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">MaxAbsScalerModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="MaxAbsScalerModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MaxAbsScalerModel.html#pyspark.ml.feature.MaxAbsScalerModel">[docs]</a><span class="k">class</span> <span class="nc">MaxAbsScalerModel</span><span class="p">(</span> | 
|  | <span class="n">JavaModel</span><span class="p">,</span> <span class="n">_MaxAbsScalerParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"MaxAbsScalerModel"</span><span class="p">],</span> <span class="n">JavaMLWritable</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`MaxAbsScaler`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="MaxAbsScalerModel.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MaxAbsScalerModel.html#pyspark.ml.feature.MaxAbsScalerModel.setInputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MaxAbsScalerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="MaxAbsScalerModel.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MaxAbsScalerModel.html#pyspark.ml.feature.MaxAbsScalerModel.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MaxAbsScalerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">maxAbs</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Max Abs vector.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"maxAbs"</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="MinHashLSH"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MinHashLSH.html#pyspark.ml.feature.MinHashLSH">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">MinHashLSH</span><span class="p">(</span> | 
|  | <span class="n">_LSH</span><span class="p">[</span><span class="s2">"MinHashLSHModel"</span><span class="p">],</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">HasSeed</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"MinHashLSH"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  |  | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    LSH class for Jaccard distance.</span> | 
|  | <span class="sd">    The input can be dense or sparse vectors, but it is more efficient if it is sparse.</span> | 
|  | <span class="sd">    For example, `Vectors.sparse(10, [(2, 1.0), (3, 1.0), (5, 1.0)])` means there are 10 elements</span> | 
|  | <span class="sd">    in the space. This set contains elements 2, 3, and 5. Also, any input vector must have at</span> | 
|  | <span class="sd">    least 1 non-zero index, and all non-zero values are treated as binary "1" values.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.2.0</span> | 
|  |  | 
|  | <span class="sd">    Notes</span> | 
|  | <span class="sd">    -----</span> | 
|  | <span class="sd">    See `Wikipedia on MinHash <https://en.wikipedia.org/wiki/MinHash>`_</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> from pyspark.sql.functions import col</span> | 
|  | <span class="sd">    >>> data = [(0, Vectors.sparse(6, [0, 1, 2], [1.0, 1.0, 1.0]),),</span> | 
|  | <span class="sd">    ...         (1, Vectors.sparse(6, [2, 3, 4], [1.0, 1.0, 1.0]),),</span> | 
|  | <span class="sd">    ...         (2, Vectors.sparse(6, [0, 2, 4], [1.0, 1.0, 1.0]),)]</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame(data, ["id", "features"])</span> | 
|  | <span class="sd">    >>> mh = MinHashLSH()</span> | 
|  | <span class="sd">    >>> mh.setInputCol("features")</span> | 
|  | <span class="sd">    MinHashLSH...</span> | 
|  | <span class="sd">    >>> mh.setOutputCol("hashes")</span> | 
|  | <span class="sd">    MinHashLSH...</span> | 
|  | <span class="sd">    >>> mh.setSeed(12345)</span> | 
|  | <span class="sd">    MinHashLSH...</span> | 
|  | <span class="sd">    >>> model = mh.fit(df)</span> | 
|  | <span class="sd">    >>> model.setInputCol("features")</span> | 
|  | <span class="sd">    MinHashLSHModel...</span> | 
|  | <span class="sd">    >>> model.transform(df).head()</span> | 
|  | <span class="sd">    Row(id=0, features=SparseVector(6, {0: 1.0, 1: 1.0, 2: 1.0}), hashes=[DenseVector([6179668...</span> | 
|  | <span class="sd">    >>> data2 = [(3, Vectors.sparse(6, [1, 3, 5], [1.0, 1.0, 1.0]),),</span> | 
|  | <span class="sd">    ...          (4, Vectors.sparse(6, [2, 3, 5], [1.0, 1.0, 1.0]),),</span> | 
|  | <span class="sd">    ...          (5, Vectors.sparse(6, [1, 2, 4], [1.0, 1.0, 1.0]),)]</span> | 
|  | <span class="sd">    >>> df2 = spark.createDataFrame(data2, ["id", "features"])</span> | 
|  | <span class="sd">    >>> key = Vectors.sparse(6, [1, 2], [1.0, 1.0])</span> | 
|  | <span class="sd">    >>> model.approxNearestNeighbors(df2, key, 1).collect()</span> | 
|  | <span class="sd">    [Row(id=5, features=SparseVector(6, {1: 1.0, 2: 1.0, 4: 1.0}), hashes=[DenseVector([6179668...</span> | 
|  | <span class="sd">    >>> model.approxSimilarityJoin(df, df2, 0.6, distCol="JaccardDistance").select(</span> | 
|  | <span class="sd">    ...     col("datasetA.id").alias("idA"),</span> | 
|  | <span class="sd">    ...     col("datasetB.id").alias("idB"),</span> | 
|  | <span class="sd">    ...     col("JaccardDistance")).show()</span> | 
|  | <span class="sd">    +---+---+---------------+</span> | 
|  | <span class="sd">    |idA|idB|JaccardDistance|</span> | 
|  | <span class="sd">    +---+---+---------------+</span> | 
|  | <span class="sd">    |  0|  5|            0.5|</span> | 
|  | <span class="sd">    |  1|  4|            0.5|</span> | 
|  | <span class="sd">    +---+---+---------------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> mhPath = temp_path + "/mh"</span> | 
|  | <span class="sd">    >>> mh.save(mhPath)</span> | 
|  | <span class="sd">    >>> mh2 = MinHashLSH.load(mhPath)</span> | 
|  | <span class="sd">    >>> mh2.getOutputCol() == mh.getOutputCol()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/mh-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> model2 = MinHashLSHModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> model.transform(df).head().hashes == model2.transform(df).head().hashes</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">numHashTables</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, inputCol=None, outputCol=None, seed=None, numHashTables=1)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">MinHashLSH</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.MinHashLSH"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="MinHashLSH.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MinHashLSH.html#pyspark.ml.feature.MinHashLSH.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">numHashTables</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"MinHashLSH"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, inputCol=None, outputCol=None, seed=None, numHashTables=1)</span> | 
|  | <span class="sd">        Sets params for this MinHashLSH.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="MinHashLSH.setSeed"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MinHashLSH.html#pyspark.ml.feature.MinHashLSH.setSeed">[docs]</a>    <span class="k">def</span> <span class="nf">setSeed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MinHashLSH"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`seed`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MinHashLSHModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">MinHashLSHModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="MinHashLSHModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MinHashLSHModel.html#pyspark.ml.feature.MinHashLSHModel">[docs]</a><span class="k">class</span> <span class="nc">MinHashLSHModel</span><span class="p">(</span><span class="n">_LSHModel</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">,</span> <span class="n">JavaMLWritable</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sa">r</span><span class="sd">"""</span> | 
|  | <span class="sd">    Model produced by :py:class:`MinHashLSH`, where where multiple hash functions are stored. Each</span> | 
|  | <span class="sd">    hash function is picked from the following family of hash functions, where :math:`a_i` and</span> | 
|  | <span class="sd">    :math:`b_i` are randomly chosen integers less than prime:</span> | 
|  | <span class="sd">    :math:`h_i(x) = ((x \cdot a_i + b_i) \mod prime)` This hash family is approximately min-wise</span> | 
|  | <span class="sd">    independent according to the reference.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.2.0</span> | 
|  |  | 
|  | <span class="sd">    Notes</span> | 
|  | <span class="sd">    -----</span> | 
|  | <span class="sd">    See Tom Bohman, Colin Cooper, and Alan Frieze. "Min-wise independent linear permutations."</span> | 
|  | <span class="sd">    Electronic Journal of Combinatorics 7 (2000): R26.</span> | 
|  | <span class="sd">    """</span></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_MinMaxScalerParams</span><span class="p">(</span><span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`MinMaxScaler` and :py:class:`MinMaxScalerModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="nb">min</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"min"</span><span class="p">,</span> | 
|  | <span class="s2">"Lower bound of the output feature range"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="nb">max</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"max"</span><span class="p">,</span> | 
|  | <span class="s2">"Upper bound of the output feature range"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_MinMaxScalerParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="nb">min</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> <span class="nb">max</span><span class="o">=</span><span class="mf">1.0</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getMin</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of min or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">min</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getMax</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of max or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">max</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="MinMaxScaler"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MinMaxScaler.html#pyspark.ml.feature.MinMaxScaler">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">MinMaxScaler</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">[</span><span class="s2">"MinMaxScalerModel"</span><span class="p">],</span> | 
|  | <span class="n">_MinMaxScalerParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"MinMaxScaler"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Rescale each feature individually to a common range [min, max] linearly using column summary</span> | 
|  | <span class="sd">    statistics, which is also known as min-max normalization or Rescaling. The rescaled value for</span> | 
|  | <span class="sd">    feature E is calculated as,</span> | 
|  |  | 
|  | <span class="sd">    Rescaled(e_i) = (e_i - E_min) / (E_max - E_min) * (max - min) + min</span> | 
|  |  | 
|  | <span class="sd">    For the case E_max == E_min, Rescaled(e_i) = 0.5 * (max + min)</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.6.0</span> | 
|  |  | 
|  | <span class="sd">    Notes</span> | 
|  | <span class="sd">    -----</span> | 
|  | <span class="sd">    Since zero values will probably be transformed to non-zero values, output of the</span> | 
|  | <span class="sd">    transformer will be DenseVector even for sparse input.</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"])</span> | 
|  | <span class="sd">    >>> mmScaler = MinMaxScaler(outputCol="scaled")</span> | 
|  | <span class="sd">    >>> mmScaler.setInputCol("a")</span> | 
|  | <span class="sd">    MinMaxScaler...</span> | 
|  | <span class="sd">    >>> model = mmScaler.fit(df)</span> | 
|  | <span class="sd">    >>> model.setOutputCol("scaledOutput")</span> | 
|  | <span class="sd">    MinMaxScalerModel...</span> | 
|  | <span class="sd">    >>> model.originalMin</span> | 
|  | <span class="sd">    DenseVector([0.0])</span> | 
|  | <span class="sd">    >>> model.originalMax</span> | 
|  | <span class="sd">    DenseVector([2.0])</span> | 
|  | <span class="sd">    >>> model.transform(df).show()</span> | 
|  | <span class="sd">    +-----+------------+</span> | 
|  | <span class="sd">    |    a|scaledOutput|</span> | 
|  | <span class="sd">    +-----+------------+</span> | 
|  | <span class="sd">    |[0.0]|       [0.0]|</span> | 
|  | <span class="sd">    |[2.0]|       [1.0]|</span> | 
|  | <span class="sd">    +-----+------------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> minMaxScalerPath = temp_path + "/min-max-scaler"</span> | 
|  | <span class="sd">    >>> mmScaler.save(minMaxScalerPath)</span> | 
|  | <span class="sd">    >>> loadedMMScaler = MinMaxScaler.load(minMaxScalerPath)</span> | 
|  | <span class="sd">    >>> loadedMMScaler.getMin() == mmScaler.getMin()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedMMScaler.getMax() == mmScaler.getMax()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/min-max-scaler-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = MinMaxScalerModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.originalMin == model.originalMin</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.originalMax == model.originalMax</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="nb">min</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> | 
|  | <span class="nb">max</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, min=0.0, max=1.0, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">MinMaxScaler</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.MinMaxScaler"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="MinMaxScaler.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MinMaxScaler.html#pyspark.ml.feature.MinMaxScaler.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="nb">min</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> | 
|  | <span class="nb">max</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"MinMaxScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, min=0.0, max=1.0, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        Sets params for this MinMaxScaler.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="MinMaxScaler.setMin"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MinMaxScaler.html#pyspark.ml.feature.MinMaxScaler.setMin">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMin</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MinMaxScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`min`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="nb">min</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="MinMaxScaler.setMax"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MinMaxScaler.html#pyspark.ml.feature.MinMaxScaler.setMax">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMax</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MinMaxScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`max`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="nb">max</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="MinMaxScaler.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MinMaxScaler.html#pyspark.ml.feature.MinMaxScaler.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MinMaxScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="MinMaxScaler.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MinMaxScaler.html#pyspark.ml.feature.MinMaxScaler.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MinMaxScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MinMaxScalerModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">MinMaxScalerModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="MinMaxScalerModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MinMaxScalerModel.html#pyspark.ml.feature.MinMaxScalerModel">[docs]</a><span class="k">class</span> <span class="nc">MinMaxScalerModel</span><span class="p">(</span> | 
|  | <span class="n">JavaModel</span><span class="p">,</span> <span class="n">_MinMaxScalerParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"MinMaxScalerModel"</span><span class="p">],</span> <span class="n">JavaMLWritable</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`MinMaxScaler`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.6.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="MinMaxScalerModel.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MinMaxScalerModel.html#pyspark.ml.feature.MinMaxScalerModel.setInputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MinMaxScalerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="MinMaxScalerModel.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MinMaxScalerModel.html#pyspark.ml.feature.MinMaxScalerModel.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MinMaxScalerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="MinMaxScalerModel.setMin"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MinMaxScalerModel.html#pyspark.ml.feature.MinMaxScalerModel.setMin">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMin</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MinMaxScalerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`min`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="nb">min</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="MinMaxScalerModel.setMax"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.MinMaxScalerModel.html#pyspark.ml.feature.MinMaxScalerModel.setMax">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMax</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"MinMaxScalerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`max`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="nb">max</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">originalMin</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Min value for each original column during fitting.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"originalMin"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">originalMax</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Max value for each original column during fitting.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"originalMax"</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="NGram"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.NGram.html#pyspark.ml.feature.NGram">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">NGram</span><span class="p">(</span><span class="n">JavaTransformer</span><span class="p">,</span> <span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"NGram"</span><span class="p">],</span> <span class="n">JavaMLWritable</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    A feature transformer that converts the input array of strings into an array of n-grams. Null</span> | 
|  | <span class="sd">    values in the input array are ignored.</span> | 
|  | <span class="sd">    It returns an array of n-grams where each n-gram is represented by a space-separated string of</span> | 
|  | <span class="sd">    words.</span> | 
|  | <span class="sd">    When the input is empty, an empty array is returned.</span> | 
|  | <span class="sd">    When the input array length is less than n (number of elements per n-gram), no n-grams are</span> | 
|  | <span class="sd">    returned.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.5.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([Row(inputTokens=["a", "b", "c", "d", "e"])])</span> | 
|  | <span class="sd">    >>> ngram = NGram(n=2)</span> | 
|  | <span class="sd">    >>> ngram.setInputCol("inputTokens")</span> | 
|  | <span class="sd">    NGram...</span> | 
|  | <span class="sd">    >>> ngram.setOutputCol("nGrams")</span> | 
|  | <span class="sd">    NGram...</span> | 
|  | <span class="sd">    >>> ngram.transform(df).head()</span> | 
|  | <span class="sd">    Row(inputTokens=['a', 'b', 'c', 'd', 'e'], nGrams=['a b', 'b c', 'c d', 'd e'])</span> | 
|  | <span class="sd">    >>> # Change n-gram length</span> | 
|  | <span class="sd">    >>> ngram.setParams(n=4).transform(df).head()</span> | 
|  | <span class="sd">    Row(inputTokens=['a', 'b', 'c', 'd', 'e'], nGrams=['a b c d', 'b c d e'])</span> | 
|  | <span class="sd">    >>> # Temporarily modify output column.</span> | 
|  | <span class="sd">    >>> ngram.transform(df, {ngram.outputCol: "output"}).head()</span> | 
|  | <span class="sd">    Row(inputTokens=['a', 'b', 'c', 'd', 'e'], output=['a b c d', 'b c d e'])</span> | 
|  | <span class="sd">    >>> ngram.transform(df).head()</span> | 
|  | <span class="sd">    Row(inputTokens=['a', 'b', 'c', 'd', 'e'], nGrams=['a b c d', 'b c d e'])</span> | 
|  | <span class="sd">    >>> # Must use keyword arguments to specify params.</span> | 
|  | <span class="sd">    >>> ngram.setParams("text")</span> | 
|  | <span class="sd">    Traceback (most recent call last):</span> | 
|  | <span class="sd">        ...</span> | 
|  | <span class="sd">    TypeError: Method setParams forces keyword arguments.</span> | 
|  | <span class="sd">    >>> ngramPath = temp_path + "/ngram"</span> | 
|  | <span class="sd">    >>> ngram.save(ngramPath)</span> | 
|  | <span class="sd">    >>> loadedNGram = NGram.load(ngramPath)</span> | 
|  | <span class="sd">    >>> loadedNGram.getN() == ngram.getN()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedNGram.transform(df).take(1) == ngram.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">n</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"n"</span><span class="p">,</span> | 
|  | <span class="s2">"number of elements per n-gram (>=1)"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">n</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, n=2, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">NGram</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.NGram"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">n</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="NGram.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.NGram.html#pyspark.ml.feature.NGram.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">n</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"NGram"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, n=2, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        Sets params for this NGram.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="NGram.setN"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.NGram.html#pyspark.ml.feature.NGram.setN">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setN</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"NGram"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`n`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">n</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="NGram.getN"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.NGram.html#pyspark.ml.feature.NGram.getN">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getN</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of n or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">n</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="NGram.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.NGram.html#pyspark.ml.feature.NGram.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"NGram"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="NGram.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.NGram.html#pyspark.ml.feature.NGram.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"NGram"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="Normalizer"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Normalizer.html#pyspark.ml.feature.Normalizer">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">Normalizer</span><span class="p">(</span> | 
|  | <span class="n">JavaTransformer</span><span class="p">,</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"Normalizer"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">     Normalize a vector to have unit norm using the given p-norm.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> svec = Vectors.sparse(4, {1: 4.0, 3: 3.0})</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(Vectors.dense([3.0, -4.0]), svec)], ["dense", "sparse"])</span> | 
|  | <span class="sd">    >>> normalizer = Normalizer(p=2.0)</span> | 
|  | <span class="sd">    >>> normalizer.setInputCol("dense")</span> | 
|  | <span class="sd">    Normalizer...</span> | 
|  | <span class="sd">    >>> normalizer.setOutputCol("features")</span> | 
|  | <span class="sd">    Normalizer...</span> | 
|  | <span class="sd">    >>> normalizer.transform(df).head().features</span> | 
|  | <span class="sd">    DenseVector([0.6, -0.8])</span> | 
|  | <span class="sd">    >>> normalizer.setParams(inputCol="sparse", outputCol="freqs").transform(df).head().freqs</span> | 
|  | <span class="sd">    SparseVector(4, {1: 0.8, 3: 0.6})</span> | 
|  | <span class="sd">    >>> params = {normalizer.p: 1.0, normalizer.inputCol: "dense", normalizer.outputCol: "vector"}</span> | 
|  | <span class="sd">    >>> normalizer.transform(df, params).head().vector</span> | 
|  | <span class="sd">    DenseVector([0.4286, -0.5714])</span> | 
|  | <span class="sd">    >>> normalizerPath = temp_path + "/normalizer"</span> | 
|  | <span class="sd">    >>> normalizer.save(normalizerPath)</span> | 
|  | <span class="sd">    >>> loadedNormalizer = Normalizer.load(normalizerPath)</span> | 
|  | <span class="sd">    >>> loadedNormalizer.getP() == normalizer.getP()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedNormalizer.transform(df).take(1) == normalizer.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">p</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span><span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> <span class="s2">"p"</span><span class="p">,</span> <span class="s2">"the p norm value."</span><span class="p">,</span> <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">p</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">2.0</span><span class="p">,</span> <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, p=2.0, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">Normalizer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.Normalizer"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">p</span><span class="o">=</span><span class="mf">2.0</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="Normalizer.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Normalizer.html#pyspark.ml.feature.Normalizer.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">p</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">2.0</span><span class="p">,</span> <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"Normalizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, p=2.0, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        Sets params for this Normalizer.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Normalizer.setP"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Normalizer.html#pyspark.ml.feature.Normalizer.setP">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setP</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Normalizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`p`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">p</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Normalizer.getP"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Normalizer.html#pyspark.ml.feature.Normalizer.getP">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getP</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of p or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">p</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Normalizer.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Normalizer.html#pyspark.ml.feature.Normalizer.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Normalizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Normalizer.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Normalizer.html#pyspark.ml.feature.Normalizer.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Normalizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_OneHotEncoderParams</span><span class="p">(</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasInputCols</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">,</span> <span class="n">HasOutputCols</span><span class="p">,</span> <span class="n">HasHandleInvalid</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`OneHotEncoder` and :py:class:`OneHotEncoderModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"handleInvalid"</span><span class="p">,</span> | 
|  | <span class="s2">"How to handle invalid data during "</span> | 
|  | <span class="o">+</span> <span class="s2">"transform(). Options are 'keep' (invalid data presented as an extra "</span> | 
|  | <span class="o">+</span> <span class="s2">"categorical feature) or error (throw an error). Note that this Param "</span> | 
|  | <span class="o">+</span> <span class="s2">"is only used during transform; during fitting, invalid data will "</span> | 
|  | <span class="o">+</span> <span class="s2">"result in an error."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">dropLast</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"dropLast"</span><span class="p">,</span> | 
|  | <span class="s2">"whether to drop the last category"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toBoolean</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_OneHotEncoderParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">handleInvalid</span><span class="o">=</span><span class="s2">"error"</span><span class="p">,</span> <span class="n">dropLast</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getDropLast</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of dropLast or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dropLast</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="OneHotEncoder"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.OneHotEncoder.html#pyspark.ml.feature.OneHotEncoder">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">OneHotEncoder</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">[</span><span class="s2">"OneHotEncoderModel"</span><span class="p">],</span> | 
|  | <span class="n">_OneHotEncoderParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"OneHotEncoder"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    A one-hot encoder that maps a column of category indices to a column of binary vectors, with</span> | 
|  | <span class="sd">    at most a single one-value per row that indicates the input category index.</span> | 
|  | <span class="sd">    For example with 5 categories, an input value of 2.0 would map to an output vector of</span> | 
|  | <span class="sd">    `[0.0, 0.0, 1.0, 0.0]`.</span> | 
|  | <span class="sd">    The last category is not included by default (configurable via :py:attr:`dropLast`),</span> | 
|  | <span class="sd">    because it makes the vector entries sum up to one, and hence linearly dependent.</span> | 
|  | <span class="sd">    So an input value of 4.0 maps to `[0.0, 0.0, 0.0, 0.0]`.</span> | 
|  |  | 
|  | <span class="sd">    When :py:attr:`handleInvalid` is configured to 'keep', an extra "category" indicating invalid</span> | 
|  | <span class="sd">    values is added as last category. So when :py:attr:`dropLast` is true, invalid values are</span> | 
|  | <span class="sd">    encoded as all-zeros vector.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.3.0</span> | 
|  |  | 
|  | <span class="sd">    Notes</span> | 
|  | <span class="sd">    -----</span> | 
|  | <span class="sd">    This is different from scikit-learn's OneHotEncoder, which keeps all categories.</span> | 
|  | <span class="sd">    The output vectors are sparse.</span> | 
|  |  | 
|  | <span class="sd">    When encoding multi-column by using :py:attr:`inputCols` and</span> | 
|  | <span class="sd">    :py:attr:`outputCols` params, input/output cols come in pairs, specified by the order in</span> | 
|  | <span class="sd">    the arrays, and each pair is treated independently.</span> | 
|  |  | 
|  | <span class="sd">    See Also</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    StringIndexer : for converting categorical values into category indices</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(0.0,), (1.0,), (2.0,)], ["input"])</span> | 
|  | <span class="sd">    >>> ohe = OneHotEncoder()</span> | 
|  | <span class="sd">    >>> ohe.setInputCols(["input"])</span> | 
|  | <span class="sd">    OneHotEncoder...</span> | 
|  | <span class="sd">    >>> ohe.setOutputCols(["output"])</span> | 
|  | <span class="sd">    OneHotEncoder...</span> | 
|  | <span class="sd">    >>> model = ohe.fit(df)</span> | 
|  | <span class="sd">    >>> model.setOutputCols(["output"])</span> | 
|  | <span class="sd">    OneHotEncoderModel...</span> | 
|  | <span class="sd">    >>> model.getHandleInvalid()</span> | 
|  | <span class="sd">    'error'</span> | 
|  | <span class="sd">    >>> model.transform(df).head().output</span> | 
|  | <span class="sd">    SparseVector(2, {0: 1.0})</span> | 
|  | <span class="sd">    >>> single_col_ohe = OneHotEncoder(inputCol="input", outputCol="output")</span> | 
|  | <span class="sd">    >>> single_col_model = single_col_ohe.fit(df)</span> | 
|  | <span class="sd">    >>> single_col_model.transform(df).head().output</span> | 
|  | <span class="sd">    SparseVector(2, {0: 1.0})</span> | 
|  | <span class="sd">    >>> ohePath = temp_path + "/ohe"</span> | 
|  | <span class="sd">    >>> ohe.save(ohePath)</span> | 
|  | <span class="sd">    >>> loadedOHE = OneHotEncoder.load(ohePath)</span> | 
|  | <span class="sd">    >>> loadedOHE.getInputCols() == ohe.getInputCols()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/ohe-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = OneHotEncoderModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.categorySizes == model.categorySizes</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">dropLast</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">dropLast</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="n">dropLast</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True, \</span> | 
|  | <span class="sd">                 inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">OneHotEncoder</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.OneHotEncoder"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">dropLast</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"OneHotEncoder"</span><span class="p">:</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">dropLast</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"OneHotEncoder"</span><span class="p">:</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <div class="viewcode-block" id="OneHotEncoder.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.OneHotEncoder.html#pyspark.ml.feature.OneHotEncoder.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="n">dropLast</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"OneHotEncoder"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, inputCols=None, outputCols=None, handleInvalid="error", \</span> | 
|  | <span class="sd">                  dropLast=True, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        Sets params for this OneHotEncoder.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="OneHotEncoder.setDropLast"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.OneHotEncoder.html#pyspark.ml.feature.OneHotEncoder.setDropLast">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setDropLast</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"OneHotEncoder"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`dropLast`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">dropLast</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="OneHotEncoder.setInputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.OneHotEncoder.html#pyspark.ml.feature.OneHotEncoder.setInputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"OneHotEncoder"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="OneHotEncoder.setOutputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.OneHotEncoder.html#pyspark.ml.feature.OneHotEncoder.setOutputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"OneHotEncoder"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="OneHotEncoder.setHandleInvalid"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.OneHotEncoder.html#pyspark.ml.feature.OneHotEncoder.setHandleInvalid">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setHandleInvalid</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"OneHotEncoder"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`handleInvalid`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">handleInvalid</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="OneHotEncoder.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.OneHotEncoder.html#pyspark.ml.feature.OneHotEncoder.setInputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"OneHotEncoder"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="OneHotEncoder.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.OneHotEncoder.html#pyspark.ml.feature.OneHotEncoder.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"OneHotEncoder"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"OneHotEncoderModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">OneHotEncoderModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="OneHotEncoderModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.OneHotEncoderModel.html#pyspark.ml.feature.OneHotEncoderModel">[docs]</a><span class="k">class</span> <span class="nc">OneHotEncoderModel</span><span class="p">(</span> | 
|  | <span class="n">JavaModel</span><span class="p">,</span> <span class="n">_OneHotEncoderParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"OneHotEncoderModel"</span><span class="p">],</span> <span class="n">JavaMLWritable</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`OneHotEncoder`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.3.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="OneHotEncoderModel.setDropLast"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.OneHotEncoderModel.html#pyspark.ml.feature.OneHotEncoderModel.setDropLast">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setDropLast</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"OneHotEncoderModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`dropLast`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">dropLast</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="OneHotEncoderModel.setInputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.OneHotEncoderModel.html#pyspark.ml.feature.OneHotEncoderModel.setInputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"OneHotEncoderModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="OneHotEncoderModel.setOutputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.OneHotEncoderModel.html#pyspark.ml.feature.OneHotEncoderModel.setOutputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"OneHotEncoderModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="OneHotEncoderModel.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.OneHotEncoderModel.html#pyspark.ml.feature.OneHotEncoderModel.setInputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"OneHotEncoderModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="OneHotEncoderModel.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.OneHotEncoderModel.html#pyspark.ml.feature.OneHotEncoderModel.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"OneHotEncoderModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="OneHotEncoderModel.setHandleInvalid"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.OneHotEncoderModel.html#pyspark.ml.feature.OneHotEncoderModel.setHandleInvalid">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setHandleInvalid</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"OneHotEncoderModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`handleInvalid`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">handleInvalid</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">categorySizes</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Original number of categories for each feature being encoded.</span> | 
|  | <span class="sd">        The array contains one value for each input column, in order.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"categorySizes"</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="PolynomialExpansion"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.PolynomialExpansion.html#pyspark.ml.feature.PolynomialExpansion">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">PolynomialExpansion</span><span class="p">(</span> | 
|  | <span class="n">JavaTransformer</span><span class="p">,</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"PolynomialExpansion"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Perform feature expansion in a polynomial space. As said in `wikipedia of Polynomial Expansion</span> | 
|  | <span class="sd">    <http://en.wikipedia.org/wiki/Polynomial_expansion>`_, "In mathematics, an</span> | 
|  | <span class="sd">    expansion of a product of sums expresses it as a sum of products by using the fact that</span> | 
|  | <span class="sd">    multiplication distributes over addition". Take a 2-variable feature vector as an example:</span> | 
|  | <span class="sd">    `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(Vectors.dense([0.5, 2.0]),)], ["dense"])</span> | 
|  | <span class="sd">    >>> px = PolynomialExpansion(degree=2)</span> | 
|  | <span class="sd">    >>> px.setInputCol("dense")</span> | 
|  | <span class="sd">    PolynomialExpansion...</span> | 
|  | <span class="sd">    >>> px.setOutputCol("expanded")</span> | 
|  | <span class="sd">    PolynomialExpansion...</span> | 
|  | <span class="sd">    >>> px.transform(df).head().expanded</span> | 
|  | <span class="sd">    DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])</span> | 
|  | <span class="sd">    >>> px.setParams(outputCol="test").transform(df).head().test</span> | 
|  | <span class="sd">    DenseVector([0.5, 0.25, 2.0, 1.0, 4.0])</span> | 
|  | <span class="sd">    >>> polyExpansionPath = temp_path + "/poly-expansion"</span> | 
|  | <span class="sd">    >>> px.save(polyExpansionPath)</span> | 
|  | <span class="sd">    >>> loadedPx = PolynomialExpansion.load(polyExpansionPath)</span> | 
|  | <span class="sd">    >>> loadedPx.getDegree() == px.getDegree()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedPx.transform(df).take(1) == px.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">degree</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"degree"</span><span class="p">,</span> | 
|  | <span class="s2">"the polynomial degree to expand (>= 1)"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">degree</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, degree=2, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">PolynomialExpansion</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> | 
|  | <span class="s2">"org.apache.spark.ml.feature.PolynomialExpansion"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> | 
|  | <span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">degree</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="PolynomialExpansion.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.PolynomialExpansion.html#pyspark.ml.feature.PolynomialExpansion.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">degree</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"PolynomialExpansion"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, degree=2, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        Sets params for this PolynomialExpansion.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="PolynomialExpansion.setDegree"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.PolynomialExpansion.html#pyspark.ml.feature.PolynomialExpansion.setDegree">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setDegree</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"PolynomialExpansion"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`degree`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">degree</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="PolynomialExpansion.getDegree"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.PolynomialExpansion.html#pyspark.ml.feature.PolynomialExpansion.getDegree">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getDegree</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of degree or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">degree</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="PolynomialExpansion.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.PolynomialExpansion.html#pyspark.ml.feature.PolynomialExpansion.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"PolynomialExpansion"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="PolynomialExpansion.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.PolynomialExpansion.html#pyspark.ml.feature.PolynomialExpansion.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"PolynomialExpansion"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="QuantileDiscretizer"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.QuantileDiscretizer.html#pyspark.ml.feature.QuantileDiscretizer">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">QuantileDiscretizer</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">,</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">HasInputCols</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCols</span><span class="p">,</span> | 
|  | <span class="n">HasHandleInvalid</span><span class="p">,</span> | 
|  | <span class="n">HasRelativeError</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"QuantileDiscretizer"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    :py:class:`QuantileDiscretizer` takes a column with continuous features and outputs a column</span> | 
|  | <span class="sd">    with binned categorical features. The number of bins can be set using the :py:attr:`numBuckets`</span> | 
|  | <span class="sd">    parameter. It is possible that the number of buckets used will be less than this value, for</span> | 
|  | <span class="sd">    example, if there are too few distinct values of the input to create enough distinct quantiles.</span> | 
|  | <span class="sd">    Since 3.0.0, :py:class:`QuantileDiscretizer` can map multiple columns at once by setting the</span> | 
|  | <span class="sd">    :py:attr:`inputCols` parameter. If both of the :py:attr:`inputCol` and :py:attr:`inputCols`</span> | 
|  | <span class="sd">    parameters are set, an Exception will be thrown. To specify the number of buckets for each</span> | 
|  | <span class="sd">    column, the :py:attr:`numBucketsArray` parameter can be set, or if the number of buckets</span> | 
|  | <span class="sd">    should be the same across columns, :py:attr:`numBuckets` can be set as a convenience.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.0.0</span> | 
|  |  | 
|  | <span class="sd">    Notes</span> | 
|  | <span class="sd">    -----</span> | 
|  | <span class="sd">    NaN handling: Note also that</span> | 
|  | <span class="sd">    :py:class:`QuantileDiscretizer` will raise an error when it finds NaN values in the dataset,</span> | 
|  | <span class="sd">    but the user can also choose to either keep or remove NaN values within the dataset by setting</span> | 
|  | <span class="sd">    :py:attr:`handleInvalid` parameter. If the user chooses to keep NaN values, they will be</span> | 
|  | <span class="sd">    handled specially and placed into their own bucket, for example, if 4 buckets are used, then</span> | 
|  | <span class="sd">    non-NaN data will be put into buckets[0-3], but NaNs will be counted in a special bucket[4].</span> | 
|  |  | 
|  | <span class="sd">    Algorithm: The bin ranges are chosen using an approximate algorithm (see the documentation for</span> | 
|  | <span class="sd">    :py:meth:`pyspark.sql.DataFrameStatFunctions.approxQuantile` for a detailed description).</span> | 
|  | <span class="sd">    The precision of the approximation can be controlled with the</span> | 
|  | <span class="sd">    :py:attr:`relativeError` parameter.</span> | 
|  | <span class="sd">    The lower and upper bin bounds will be `-Infinity` and `+Infinity`, covering all real values.</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> values = [(0.1,), (0.4,), (1.2,), (1.5,), (float("nan"),), (float("nan"),)]</span> | 
|  | <span class="sd">    >>> df1 = spark.createDataFrame(values, ["values"])</span> | 
|  | <span class="sd">    >>> qds1 = QuantileDiscretizer(inputCol="values", outputCol="buckets")</span> | 
|  | <span class="sd">    >>> qds1.setNumBuckets(2)</span> | 
|  | <span class="sd">    QuantileDiscretizer...</span> | 
|  | <span class="sd">    >>> qds1.setRelativeError(0.01)</span> | 
|  | <span class="sd">    QuantileDiscretizer...</span> | 
|  | <span class="sd">    >>> qds1.setHandleInvalid("error")</span> | 
|  | <span class="sd">    QuantileDiscretizer...</span> | 
|  | <span class="sd">    >>> qds1.getRelativeError()</span> | 
|  | <span class="sd">    0.01</span> | 
|  | <span class="sd">    >>> bucketizer = qds1.fit(df1)</span> | 
|  | <span class="sd">    >>> qds1.setHandleInvalid("keep").fit(df1).transform(df1).count()</span> | 
|  | <span class="sd">    6</span> | 
|  | <span class="sd">    >>> qds1.setHandleInvalid("skip").fit(df1).transform(df1).count()</span> | 
|  | <span class="sd">    4</span> | 
|  | <span class="sd">    >>> splits = bucketizer.getSplits()</span> | 
|  | <span class="sd">    >>> splits[0]</span> | 
|  | <span class="sd">    -inf</span> | 
|  | <span class="sd">    >>> print("%2.1f" % round(splits[1], 1))</span> | 
|  | <span class="sd">    0.4</span> | 
|  | <span class="sd">    >>> bucketed = bucketizer.transform(df1).head()</span> | 
|  | <span class="sd">    >>> bucketed.buckets</span> | 
|  | <span class="sd">    0.0</span> | 
|  | <span class="sd">    >>> quantileDiscretizerPath = temp_path + "/quantile-discretizer"</span> | 
|  | <span class="sd">    >>> qds1.save(quantileDiscretizerPath)</span> | 
|  | <span class="sd">    >>> loadedQds = QuantileDiscretizer.load(quantileDiscretizerPath)</span> | 
|  | <span class="sd">    >>> loadedQds.getNumBuckets() == qds1.getNumBuckets()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> inputs = [(0.1, 0.0), (0.4, 1.0), (1.2, 1.3), (1.5, 1.5),</span> | 
|  | <span class="sd">    ...     (float("nan"), float("nan")), (float("nan"), float("nan"))]</span> | 
|  | <span class="sd">    >>> df2 = spark.createDataFrame(inputs, ["input1", "input2"])</span> | 
|  | <span class="sd">    >>> qds2 = QuantileDiscretizer(relativeError=0.01, handleInvalid="error", numBuckets=2,</span> | 
|  | <span class="sd">    ...     inputCols=["input1", "input2"], outputCols=["output1", "output2"])</span> | 
|  | <span class="sd">    >>> qds2.getRelativeError()</span> | 
|  | <span class="sd">    0.01</span> | 
|  | <span class="sd">    >>> qds2.setHandleInvalid("keep").fit(df2).transform(df2).show()</span> | 
|  | <span class="sd">    +------+------+-------+-------+</span> | 
|  | <span class="sd">    |input1|input2|output1|output2|</span> | 
|  | <span class="sd">    +------+------+-------+-------+</span> | 
|  | <span class="sd">    |   0.1|   0.0|    0.0|    0.0|</span> | 
|  | <span class="sd">    |   0.4|   1.0|    1.0|    1.0|</span> | 
|  | <span class="sd">    |   1.2|   1.3|    1.0|    1.0|</span> | 
|  | <span class="sd">    |   1.5|   1.5|    1.0|    1.0|</span> | 
|  | <span class="sd">    |   NaN|   NaN|    2.0|    2.0|</span> | 
|  | <span class="sd">    |   NaN|   NaN|    2.0|    2.0|</span> | 
|  | <span class="sd">    +------+------+-------+-------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> qds3 = QuantileDiscretizer(relativeError=0.01, handleInvalid="error",</span> | 
|  | <span class="sd">    ...      numBucketsArray=[5, 10], inputCols=["input1", "input2"],</span> | 
|  | <span class="sd">    ...      outputCols=["output1", "output2"])</span> | 
|  | <span class="sd">    >>> qds3.setHandleInvalid("skip").fit(df2).transform(df2).show()</span> | 
|  | <span class="sd">    +------+------+-------+-------+</span> | 
|  | <span class="sd">    |input1|input2|output1|output2|</span> | 
|  | <span class="sd">    +------+------+-------+-------+</span> | 
|  | <span class="sd">    |   0.1|   0.0|    1.0|    1.0|</span> | 
|  | <span class="sd">    |   0.4|   1.0|    2.0|    2.0|</span> | 
|  | <span class="sd">    |   1.2|   1.3|    3.0|    3.0|</span> | 
|  | <span class="sd">    |   1.5|   1.5|    4.0|    4.0|</span> | 
|  | <span class="sd">    +------+------+-------+-------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">numBuckets</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"numBuckets"</span><span class="p">,</span> | 
|  | <span class="s2">"Maximum number of buckets (quantiles, or "</span> | 
|  | <span class="o">+</span> <span class="s2">"categories) into which data points are grouped. Must be >= 2."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"handleInvalid"</span><span class="p">,</span> | 
|  | <span class="s2">"how to handle invalid entries. "</span> | 
|  | <span class="o">+</span> <span class="s2">"Options are skip (filter out rows with invalid values), "</span> | 
|  | <span class="o">+</span> <span class="s2">"error (throw an error), or keep (keep invalid values in a special "</span> | 
|  | <span class="o">+</span> <span class="s2">"additional bucket). Note that in the multiple columns "</span> | 
|  | <span class="o">+</span> <span class="s2">"case, the invalid handling is applied to all columns. That said "</span> | 
|  | <span class="o">+</span> <span class="s2">"for 'error' it will throw an error if any invalids are found in "</span> | 
|  | <span class="o">+</span> <span class="s2">"any columns, for 'skip' it will skip rows with any invalids in "</span> | 
|  | <span class="o">+</span> <span class="s2">"any columns, etc."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">numBucketsArray</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"numBucketsArray"</span><span class="p">,</span> | 
|  | <span class="s2">"Array of number of buckets "</span> | 
|  | <span class="o">+</span> <span class="s2">"(quantiles, or categories) into which data points are grouped. "</span> | 
|  | <span class="o">+</span> <span class="s2">"This is for multiple columns input. If transforming multiple "</span> | 
|  | <span class="o">+</span> <span class="s2">"columns and numBucketsArray is not set, but numBuckets is set, "</span> | 
|  | <span class="o">+</span> <span class="s2">"then numBuckets will be applied across all columns."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toListInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">numBuckets</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">relativeError</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">relativeError</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">numBucketsArray</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">numBuckets</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">relativeError</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.001</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="n">numBucketsArray</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \</span> | 
|  | <span class="sd">                 handleInvalid="error", numBucketsArray=None, inputCols=None, outputCols=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">QuantileDiscretizer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> | 
|  | <span class="s2">"org.apache.spark.ml.feature.QuantileDiscretizer"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> | 
|  | <span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">numBuckets</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">relativeError</span><span class="o">=</span><span class="mf">0.001</span><span class="p">,</span> <span class="n">handleInvalid</span><span class="o">=</span><span class="s2">"error"</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">numBuckets</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">relativeError</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"QuantileDiscretizer"</span><span class="p">:</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">relativeError</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">numBucketsArray</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"QuantileDiscretizer"</span><span class="p">:</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <div class="viewcode-block" id="QuantileDiscretizer.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.QuantileDiscretizer.html#pyspark.ml.feature.QuantileDiscretizer.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">numBuckets</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">relativeError</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.001</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="n">numBucketsArray</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"QuantileDiscretizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \</span> | 
|  | <span class="sd">                  handleInvalid="error", numBucketsArray=None, inputCols=None, outputCols=None)</span> | 
|  | <span class="sd">        Set the params for the QuantileDiscretizer</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="QuantileDiscretizer.setNumBuckets"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.QuantileDiscretizer.html#pyspark.ml.feature.QuantileDiscretizer.setNumBuckets">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setNumBuckets</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"QuantileDiscretizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`numBuckets`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">numBuckets</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="QuantileDiscretizer.getNumBuckets"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.QuantileDiscretizer.html#pyspark.ml.feature.QuantileDiscretizer.getNumBuckets">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getNumBuckets</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of numBuckets or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">numBuckets</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="QuantileDiscretizer.setNumBucketsArray"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.QuantileDiscretizer.html#pyspark.ml.feature.QuantileDiscretizer.setNumBucketsArray">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setNumBucketsArray</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"QuantileDiscretizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`numBucketsArray`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">numBucketsArray</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="QuantileDiscretizer.getNumBucketsArray"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.QuantileDiscretizer.html#pyspark.ml.feature.QuantileDiscretizer.getNumBucketsArray">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getNumBucketsArray</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of numBucketsArray or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">numBucketsArray</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="QuantileDiscretizer.setRelativeError"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.QuantileDiscretizer.html#pyspark.ml.feature.QuantileDiscretizer.setRelativeError">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setRelativeError</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"QuantileDiscretizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`relativeError`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">relativeError</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="QuantileDiscretizer.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.QuantileDiscretizer.html#pyspark.ml.feature.QuantileDiscretizer.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"QuantileDiscretizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="QuantileDiscretizer.setInputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.QuantileDiscretizer.html#pyspark.ml.feature.QuantileDiscretizer.setInputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"QuantileDiscretizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="QuantileDiscretizer.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.QuantileDiscretizer.html#pyspark.ml.feature.QuantileDiscretizer.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"QuantileDiscretizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="QuantileDiscretizer.setOutputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.QuantileDiscretizer.html#pyspark.ml.feature.QuantileDiscretizer.setOutputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"QuantileDiscretizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="QuantileDiscretizer.setHandleInvalid"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.QuantileDiscretizer.html#pyspark.ml.feature.QuantileDiscretizer.setHandleInvalid">[docs]</a>    <span class="k">def</span> <span class="nf">setHandleInvalid</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"QuantileDiscretizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`handleInvalid`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">handleInvalid</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="n">Bucketizer</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Private method to convert the java_model to a Python model.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">isSet</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">inputCol</span><span class="p">):</span> | 
|  | <span class="k">return</span> <span class="n">Bucketizer</span><span class="p">(</span> | 
|  | <span class="n">splits</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">java_model</span><span class="o">.</span><span class="n">getSplits</span><span class="p">()),</span> | 
|  | <span class="n">inputCol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">getInputCol</span><span class="p">(),</span> | 
|  | <span class="n">outputCol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">getOutputCol</span><span class="p">(),</span> | 
|  | <span class="n">handleInvalid</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">getHandleInvalid</span><span class="p">(),</span> | 
|  | <span class="p">)</span> | 
|  | <span class="k">else</span><span class="p">:</span> | 
|  | <span class="n">splitsArrayList</span> <span class="o">=</span> <span class="p">[</span><span class="nb">list</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">list</span><span class="p">(</span><span class="n">java_model</span><span class="o">.</span><span class="n">getSplitsArray</span><span class="p">())]</span> | 
|  | <span class="k">return</span> <span class="n">Bucketizer</span><span class="p">(</span> | 
|  | <span class="n">splitsArray</span><span class="o">=</span><span class="n">splitsArrayList</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">getInputCols</span><span class="p">(),</span> | 
|  | <span class="n">outputCols</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">getOutputCols</span><span class="p">(),</span> | 
|  | <span class="n">handleInvalid</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">getHandleInvalid</span><span class="p">(),</span> | 
|  | <span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_RobustScalerParams</span><span class="p">(</span><span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">,</span> <span class="n">HasRelativeError</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`RobustScaler` and :py:class:`RobustScalerModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">lower</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"lower"</span><span class="p">,</span> | 
|  | <span class="s2">"Lower quantile to calculate quantile range"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">upper</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"upper"</span><span class="p">,</span> | 
|  | <span class="s2">"Upper quantile to calculate quantile range"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">withCentering</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"withCentering"</span><span class="p">,</span> | 
|  | <span class="s2">"Whether to center data with median"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toBoolean</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">withScaling</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"withScaling"</span><span class="p">,</span> | 
|  | <span class="s2">"Whether to scale the data to "</span> <span class="s2">"quantile range"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toBoolean</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_RobustScalerParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span> | 
|  | <span class="n">lower</span><span class="o">=</span><span class="mf">0.25</span><span class="p">,</span> <span class="n">upper</span><span class="o">=</span><span class="mf">0.75</span><span class="p">,</span> <span class="n">withCentering</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">withScaling</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">relativeError</span><span class="o">=</span><span class="mf">0.001</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getLower</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of lower or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lower</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getUpper</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of upper or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">upper</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getWithCentering</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of withCentering or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">withCentering</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getWithScaling</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of withScaling or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">withScaling</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="RobustScaler"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RobustScaler.html#pyspark.ml.feature.RobustScaler">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">RobustScaler</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">,</span> <span class="n">_RobustScalerParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"RobustScaler"</span><span class="p">],</span> <span class="n">JavaMLWritable</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    RobustScaler removes the median and scales the data according to the quantile range.</span> | 
|  | <span class="sd">    The quantile range is by default IQR (Interquartile Range, quantile range between the</span> | 
|  | <span class="sd">    1st quartile = 25th quantile and the 3rd quartile = 75th quantile) but can be configured.</span> | 
|  | <span class="sd">    Centering and scaling happen independently on each feature by computing the relevant</span> | 
|  | <span class="sd">    statistics on the samples in the training set. Median and quantile range are then</span> | 
|  | <span class="sd">    stored to be used on later data using the transform method.</span> | 
|  | <span class="sd">    Note that NaN values are ignored in the computation of medians and ranges.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> data = [(0, Vectors.dense([0.0, 0.0]),),</span> | 
|  | <span class="sd">    ...         (1, Vectors.dense([1.0, -1.0]),),</span> | 
|  | <span class="sd">    ...         (2, Vectors.dense([2.0, -2.0]),),</span> | 
|  | <span class="sd">    ...         (3, Vectors.dense([3.0, -3.0]),),</span> | 
|  | <span class="sd">    ...         (4, Vectors.dense([4.0, -4.0]),),]</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame(data, ["id", "features"])</span> | 
|  | <span class="sd">    >>> scaler = RobustScaler()</span> | 
|  | <span class="sd">    >>> scaler.setInputCol("features")</span> | 
|  | <span class="sd">    RobustScaler...</span> | 
|  | <span class="sd">    >>> scaler.setOutputCol("scaled")</span> | 
|  | <span class="sd">    RobustScaler...</span> | 
|  | <span class="sd">    >>> model = scaler.fit(df)</span> | 
|  | <span class="sd">    >>> model.setOutputCol("output")</span> | 
|  | <span class="sd">    RobustScalerModel...</span> | 
|  | <span class="sd">    >>> model.median</span> | 
|  | <span class="sd">    DenseVector([2.0, -2.0])</span> | 
|  | <span class="sd">    >>> model.range</span> | 
|  | <span class="sd">    DenseVector([2.0, 2.0])</span> | 
|  | <span class="sd">    >>> model.transform(df).collect()[1].output</span> | 
|  | <span class="sd">    DenseVector([0.5, -0.5])</span> | 
|  | <span class="sd">    >>> scalerPath = temp_path + "/robust-scaler"</span> | 
|  | <span class="sd">    >>> scaler.save(scalerPath)</span> | 
|  | <span class="sd">    >>> loadedScaler = RobustScaler.load(scalerPath)</span> | 
|  | <span class="sd">    >>> loadedScaler.getWithCentering() == scaler.getWithCentering()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedScaler.getWithScaling() == scaler.getWithScaling()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/robust-scaler-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = RobustScalerModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.median == model.median</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.range == model.range</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">lower</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.25</span><span class="p">,</span> | 
|  | <span class="n">upper</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.75</span><span class="p">,</span> | 
|  | <span class="n">withCentering</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">withScaling</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">relativeError</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.001</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, lower=0.25, upper=0.75, withCentering=False, withScaling=True, \</span> | 
|  | <span class="sd">                 inputCol=None, outputCol=None, relativeError=0.001)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">RobustScaler</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.RobustScaler"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="RobustScaler.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RobustScaler.html#pyspark.ml.feature.RobustScaler.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">lower</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.25</span><span class="p">,</span> | 
|  | <span class="n">upper</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.75</span><span class="p">,</span> | 
|  | <span class="n">withCentering</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">withScaling</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">relativeError</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.001</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"RobustScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, lower=0.25, upper=0.75, withCentering=False, withScaling=True, \</span> | 
|  | <span class="sd">                  inputCol=None, outputCol=None, relativeError=0.001)</span> | 
|  | <span class="sd">        Sets params for this RobustScaler.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RobustScaler.setLower"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RobustScaler.html#pyspark.ml.feature.RobustScaler.setLower">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setLower</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RobustScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`lower`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">lower</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RobustScaler.setUpper"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RobustScaler.html#pyspark.ml.feature.RobustScaler.setUpper">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setUpper</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RobustScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`upper`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">upper</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RobustScaler.setWithCentering"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RobustScaler.html#pyspark.ml.feature.RobustScaler.setWithCentering">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setWithCentering</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RobustScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`withCentering`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">withCentering</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RobustScaler.setWithScaling"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RobustScaler.html#pyspark.ml.feature.RobustScaler.setWithScaling">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setWithScaling</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RobustScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`withScaling`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">withScaling</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RobustScaler.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RobustScaler.html#pyspark.ml.feature.RobustScaler.setInputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RobustScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RobustScaler.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RobustScaler.html#pyspark.ml.feature.RobustScaler.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RobustScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RobustScaler.setRelativeError"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RobustScaler.html#pyspark.ml.feature.RobustScaler.setRelativeError">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setRelativeError</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RobustScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`relativeError`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">relativeError</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RobustScalerModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">RobustScalerModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="RobustScalerModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RobustScalerModel.html#pyspark.ml.feature.RobustScalerModel">[docs]</a><span class="k">class</span> <span class="nc">RobustScalerModel</span><span class="p">(</span> | 
|  | <span class="n">JavaModel</span><span class="p">,</span> <span class="n">_RobustScalerParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"RobustScalerModel"</span><span class="p">],</span> <span class="n">JavaMLWritable</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`RobustScaler`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="RobustScalerModel.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RobustScalerModel.html#pyspark.ml.feature.RobustScalerModel.setInputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RobustScalerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RobustScalerModel.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RobustScalerModel.html#pyspark.ml.feature.RobustScalerModel.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RobustScalerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">median</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Median of the RobustScalerModel.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"median"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">range</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Quantile range of the RobustScalerModel.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"range"</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="RegexTokenizer"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RegexTokenizer.html#pyspark.ml.feature.RegexTokenizer">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">RegexTokenizer</span><span class="p">(</span> | 
|  | <span class="n">JavaTransformer</span><span class="p">,</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"RegexTokenizer"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    A regex based tokenizer that extracts tokens either by using the</span> | 
|  | <span class="sd">    provided regex pattern (in Java dialect) to split the text</span> | 
|  | <span class="sd">    (default) or repeatedly matching the regex (if gaps is false).</span> | 
|  | <span class="sd">    Optional parameters also allow filtering tokens using a minimal</span> | 
|  | <span class="sd">    length.</span> | 
|  | <span class="sd">    It returns an array of strings that can be empty.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([("A B  c",)], ["text"])</span> | 
|  | <span class="sd">    >>> reTokenizer = RegexTokenizer()</span> | 
|  | <span class="sd">    >>> reTokenizer.setInputCol("text")</span> | 
|  | <span class="sd">    RegexTokenizer...</span> | 
|  | <span class="sd">    >>> reTokenizer.setOutputCol("words")</span> | 
|  | <span class="sd">    RegexTokenizer...</span> | 
|  | <span class="sd">    >>> reTokenizer.transform(df).head()</span> | 
|  | <span class="sd">    Row(text='A B  c', words=['a', 'b', 'c'])</span> | 
|  | <span class="sd">    >>> # Change a parameter.</span> | 
|  | <span class="sd">    >>> reTokenizer.setParams(outputCol="tokens").transform(df).head()</span> | 
|  | <span class="sd">    Row(text='A B  c', tokens=['a', 'b', 'c'])</span> | 
|  | <span class="sd">    >>> # Temporarily modify a parameter.</span> | 
|  | <span class="sd">    >>> reTokenizer.transform(df, {reTokenizer.outputCol: "words"}).head()</span> | 
|  | <span class="sd">    Row(text='A B  c', words=['a', 'b', 'c'])</span> | 
|  | <span class="sd">    >>> reTokenizer.transform(df).head()</span> | 
|  | <span class="sd">    Row(text='A B  c', tokens=['a', 'b', 'c'])</span> | 
|  | <span class="sd">    >>> # Must use keyword arguments to specify params.</span> | 
|  | <span class="sd">    >>> reTokenizer.setParams("text")</span> | 
|  | <span class="sd">    Traceback (most recent call last):</span> | 
|  | <span class="sd">        ...</span> | 
|  | <span class="sd">    TypeError: Method setParams forces keyword arguments.</span> | 
|  | <span class="sd">    >>> regexTokenizerPath = temp_path + "/regex-tokenizer"</span> | 
|  | <span class="sd">    >>> reTokenizer.save(regexTokenizerPath)</span> | 
|  | <span class="sd">    >>> loadedReTokenizer = RegexTokenizer.load(regexTokenizerPath)</span> | 
|  | <span class="sd">    >>> loadedReTokenizer.getMinTokenLength() == reTokenizer.getMinTokenLength()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedReTokenizer.getGaps() == reTokenizer.getGaps()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedReTokenizer.transform(df).take(1) == reTokenizer.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">minTokenLength</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"minTokenLength"</span><span class="p">,</span> | 
|  | <span class="s2">"minimum token length (>= 0)"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">gaps</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"gaps"</span><span class="p">,</span> | 
|  | <span class="s2">"whether regex splits on gaps (True) or matches tokens "</span> <span class="o">+</span> <span class="s2">"(False)"</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">pattern</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"pattern"</span><span class="p">,</span> | 
|  | <span class="s2">"regex pattern (Java dialect) used for tokenizing"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">toLowercase</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"toLowercase"</span><span class="p">,</span> | 
|  | <span class="s2">"whether to convert all characters to "</span> <span class="o">+</span> <span class="s2">"lowercase before tokenizing"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toBoolean</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">minTokenLength</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> | 
|  | <span class="n">gaps</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">pattern</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"</span><span class="se">\\</span><span class="s2">s+"</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">toLowercase</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, \</span> | 
|  | <span class="sd">                 outputCol=None, toLowercase=True)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">RegexTokenizer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.RegexTokenizer"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">minTokenLength</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">gaps</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">pattern</span><span class="o">=</span><span class="s2">"</span><span class="se">\\</span><span class="s2">s+"</span><span class="p">,</span> <span class="n">toLowercase</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="RegexTokenizer.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RegexTokenizer.html#pyspark.ml.feature.RegexTokenizer.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">minTokenLength</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> | 
|  | <span class="n">gaps</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">pattern</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"</span><span class="se">\\</span><span class="s2">s+"</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">toLowercase</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"RegexTokenizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, \</span> | 
|  | <span class="sd">                  outputCol=None, toLowercase=True)</span> | 
|  | <span class="sd">        Sets params for this RegexTokenizer.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RegexTokenizer.setMinTokenLength"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RegexTokenizer.html#pyspark.ml.feature.RegexTokenizer.setMinTokenLength">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMinTokenLength</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RegexTokenizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`minTokenLength`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minTokenLength</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RegexTokenizer.getMinTokenLength"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RegexTokenizer.html#pyspark.ml.feature.RegexTokenizer.getMinTokenLength">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getMinTokenLength</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of minTokenLength or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">minTokenLength</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RegexTokenizer.setGaps"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RegexTokenizer.html#pyspark.ml.feature.RegexTokenizer.setGaps">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setGaps</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RegexTokenizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`gaps`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">gaps</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RegexTokenizer.getGaps"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RegexTokenizer.html#pyspark.ml.feature.RegexTokenizer.getGaps">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getGaps</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of gaps or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">gaps</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RegexTokenizer.setPattern"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RegexTokenizer.html#pyspark.ml.feature.RegexTokenizer.setPattern">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setPattern</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RegexTokenizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`pattern`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">pattern</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RegexTokenizer.getPattern"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RegexTokenizer.html#pyspark.ml.feature.RegexTokenizer.getPattern">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getPattern</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of pattern or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pattern</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RegexTokenizer.setToLowercase"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RegexTokenizer.html#pyspark.ml.feature.RegexTokenizer.setToLowercase">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setToLowercase</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RegexTokenizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`toLowercase`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">toLowercase</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RegexTokenizer.getToLowercase"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RegexTokenizer.html#pyspark.ml.feature.RegexTokenizer.getToLowercase">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getToLowercase</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of toLowercase or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">toLowercase</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RegexTokenizer.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RegexTokenizer.html#pyspark.ml.feature.RegexTokenizer.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RegexTokenizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RegexTokenizer.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RegexTokenizer.html#pyspark.ml.feature.RegexTokenizer.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RegexTokenizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="SQLTransformer"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.SQLTransformer.html#pyspark.ml.feature.SQLTransformer">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">SQLTransformer</span><span class="p">(</span><span class="n">JavaTransformer</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"SQLTransformer"</span><span class="p">],</span> <span class="n">JavaMLWritable</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Implements the transforms which are defined by SQL statement.</span> | 
|  | <span class="sd">    Currently we only support SQL syntax like `SELECT ... FROM __THIS__`</span> | 
|  | <span class="sd">    where `__THIS__` represents the underlying table of the input dataset.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.6.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(0, 1.0, 3.0), (2, 2.0, 5.0)], ["id", "v1", "v2"])</span> | 
|  | <span class="sd">    >>> sqlTrans = SQLTransformer(</span> | 
|  | <span class="sd">    ...     statement="SELECT *, (v1 + v2) AS v3, (v1 * v2) AS v4 FROM __THIS__")</span> | 
|  | <span class="sd">    >>> sqlTrans.transform(df).head()</span> | 
|  | <span class="sd">    Row(id=0, v1=1.0, v2=3.0, v3=4.0, v4=3.0)</span> | 
|  | <span class="sd">    >>> sqlTransformerPath = temp_path + "/sql-transformer"</span> | 
|  | <span class="sd">    >>> sqlTrans.save(sqlTransformerPath)</span> | 
|  | <span class="sd">    >>> loadedSqlTrans = SQLTransformer.load(sqlTransformerPath)</span> | 
|  | <span class="sd">    >>> loadedSqlTrans.getStatement() == sqlTrans.getStatement()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedSqlTrans.transform(df).take(1) == sqlTrans.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">statement</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> <span class="s2">"statement"</span><span class="p">,</span> <span class="s2">"SQL statement"</span><span class="p">,</span> <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">statement</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, statement=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">SQLTransformer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.SQLTransformer"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="SQLTransformer.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.SQLTransformer.html#pyspark.ml.feature.SQLTransformer.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">statement</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"SQLTransformer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, statement=None)</span> | 
|  | <span class="sd">        Sets params for this SQLTransformer.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="SQLTransformer.setStatement"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.SQLTransformer.html#pyspark.ml.feature.SQLTransformer.setStatement">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setStatement</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"SQLTransformer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`statement`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">statement</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="SQLTransformer.getStatement"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.SQLTransformer.html#pyspark.ml.feature.SQLTransformer.getStatement">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getStatement</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of statement or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">statement</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_StandardScalerParams</span><span class="p">(</span><span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`StandardScaler` and :py:class:`StandardScalerModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">withMean</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> <span class="s2">"withMean"</span><span class="p">,</span> <span class="s2">"Center data with mean"</span><span class="p">,</span> <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toBoolean</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">withStd</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"withStd"</span><span class="p">,</span> | 
|  | <span class="s2">"Scale to unit standard deviation"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toBoolean</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_StandardScalerParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">withMean</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">withStd</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getWithMean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of withMean or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">withMean</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getWithStd</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of withStd or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">withStd</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="StandardScaler"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StandardScaler.html#pyspark.ml.feature.StandardScaler">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">StandardScaler</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">[</span><span class="s2">"StandardScalerModel"</span><span class="p">],</span> | 
|  | <span class="n">_StandardScalerParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"StandardScaler"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Standardizes features by removing the mean and scaling to unit variance using column summary</span> | 
|  | <span class="sd">    statistics on the samples in the training set.</span> | 
|  |  | 
|  | <span class="sd">    The "unit std" is computed using the `corrected sample standard deviation \</span> | 
|  | <span class="sd">    <https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation>`_,</span> | 
|  | <span class="sd">    which is computed as the square root of the unbiased sample variance.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"])</span> | 
|  | <span class="sd">    >>> standardScaler = StandardScaler()</span> | 
|  | <span class="sd">    >>> standardScaler.setInputCol("a")</span> | 
|  | <span class="sd">    StandardScaler...</span> | 
|  | <span class="sd">    >>> standardScaler.setOutputCol("scaled")</span> | 
|  | <span class="sd">    StandardScaler...</span> | 
|  | <span class="sd">    >>> model = standardScaler.fit(df)</span> | 
|  | <span class="sd">    >>> model.getInputCol()</span> | 
|  | <span class="sd">    'a'</span> | 
|  | <span class="sd">    >>> model.setOutputCol("output")</span> | 
|  | <span class="sd">    StandardScalerModel...</span> | 
|  | <span class="sd">    >>> model.mean</span> | 
|  | <span class="sd">    DenseVector([1.0])</span> | 
|  | <span class="sd">    >>> model.std</span> | 
|  | <span class="sd">    DenseVector([1.4142])</span> | 
|  | <span class="sd">    >>> model.transform(df).collect()[1].output</span> | 
|  | <span class="sd">    DenseVector([1.4142])</span> | 
|  | <span class="sd">    >>> standardScalerPath = temp_path + "/standard-scaler"</span> | 
|  | <span class="sd">    >>> standardScaler.save(standardScalerPath)</span> | 
|  | <span class="sd">    >>> loadedStandardScaler = StandardScaler.load(standardScalerPath)</span> | 
|  | <span class="sd">    >>> loadedStandardScaler.getWithMean() == standardScaler.getWithMean()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedStandardScaler.getWithStd() == standardScaler.getWithStd()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/standard-scaler-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = StandardScalerModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.std == model.std</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.mean == model.mean</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">withMean</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">withStd</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, withMean=False, withStd=True, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">StandardScaler</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.StandardScaler"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="StandardScaler.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StandardScaler.html#pyspark.ml.feature.StandardScaler.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">withMean</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">withStd</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"StandardScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, withMean=False, withStd=True, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        Sets params for this StandardScaler.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StandardScaler.setWithMean"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StandardScaler.html#pyspark.ml.feature.StandardScaler.setWithMean">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setWithMean</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StandardScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`withMean`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">withMean</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StandardScaler.setWithStd"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StandardScaler.html#pyspark.ml.feature.StandardScaler.setWithStd">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setWithStd</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StandardScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`withStd`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">withStd</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StandardScaler.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StandardScaler.html#pyspark.ml.feature.StandardScaler.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StandardScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StandardScaler.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StandardScaler.html#pyspark.ml.feature.StandardScaler.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StandardScaler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StandardScalerModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">StandardScalerModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="StandardScalerModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StandardScalerModel.html#pyspark.ml.feature.StandardScalerModel">[docs]</a><span class="k">class</span> <span class="nc">StandardScalerModel</span><span class="p">(</span> | 
|  | <span class="n">JavaModel</span><span class="p">,</span> | 
|  | <span class="n">_StandardScalerParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"StandardScalerModel"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`StandardScaler`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="StandardScalerModel.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StandardScalerModel.html#pyspark.ml.feature.StandardScalerModel.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StandardScalerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StandardScalerModel.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StandardScalerModel.html#pyspark.ml.feature.StandardScalerModel.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StandardScalerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">std</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Standard deviation of the StandardScalerModel.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"std"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Mean of the StandardScalerModel.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"mean"</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_StringIndexerParams</span><span class="p">(</span> | 
|  | <span class="n">JavaParams</span><span class="p">,</span> <span class="n">HasHandleInvalid</span><span class="p">,</span> <span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">,</span> <span class="n">HasInputCols</span><span class="p">,</span> <span class="n">HasOutputCols</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`StringIndexer` and :py:class:`StringIndexerModel`.</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">stringOrderType</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"stringOrderType"</span><span class="p">,</span> | 
|  | <span class="s2">"How to order labels of string column. The first label after "</span> | 
|  | <span class="o">+</span> <span class="s2">"ordering is assigned an index of 0. Supported options: "</span> | 
|  | <span class="o">+</span> <span class="s2">"frequencyDesc, frequencyAsc, alphabetDesc, alphabetAsc. "</span> | 
|  | <span class="o">+</span> <span class="s2">"Default is frequencyDesc. In case of equal frequency when "</span> | 
|  | <span class="o">+</span> <span class="s2">"under frequencyDesc/Asc, the strings are further sorted "</span> | 
|  | <span class="o">+</span> <span class="s2">"alphabetically"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"handleInvalid"</span><span class="p">,</span> | 
|  | <span class="s2">"how to handle invalid data (unseen "</span> | 
|  | <span class="o">+</span> <span class="s2">"or NULL values) in features and label column of string type. "</span> | 
|  | <span class="o">+</span> <span class="s2">"Options are 'skip' (filter out rows with invalid data), "</span> | 
|  | <span class="o">+</span> <span class="s2">"error (throw an error), or 'keep' (put invalid data "</span> | 
|  | <span class="o">+</span> <span class="s2">"in a special additional bucket, at index numLabels)."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_StringIndexerParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">handleInvalid</span><span class="o">=</span><span class="s2">"error"</span><span class="p">,</span> <span class="n">stringOrderType</span><span class="o">=</span><span class="s2">"frequencyDesc"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getStringOrderType</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`stringOrderType` or its default value 'frequencyDesc'.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">stringOrderType</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexer"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexer.html#pyspark.ml.feature.StringIndexer">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">StringIndexer</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">[</span><span class="s2">"StringIndexerModel"</span><span class="p">],</span> | 
|  | <span class="n">_StringIndexerParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"StringIndexer"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    A label indexer that maps a string column of labels to an ML column of label indices.</span> | 
|  | <span class="sd">    If the input column is numeric, we cast it to string and index the string values.</span> | 
|  | <span class="sd">    The indices are in [0, numLabels). By default, this is ordered by label frequencies</span> | 
|  | <span class="sd">    so the most frequent label gets index 0. The ordering behavior is controlled by</span> | 
|  | <span class="sd">    setting :py:attr:`stringOrderType`. Its default value is 'frequencyDesc'.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed",</span> | 
|  | <span class="sd">    ...     stringOrderType="frequencyDesc")</span> | 
|  | <span class="sd">    >>> stringIndexer.setHandleInvalid("error")</span> | 
|  | <span class="sd">    StringIndexer...</span> | 
|  | <span class="sd">    >>> model = stringIndexer.fit(stringIndDf)</span> | 
|  | <span class="sd">    >>> model.setHandleInvalid("error")</span> | 
|  | <span class="sd">    StringIndexerModel...</span> | 
|  | <span class="sd">    >>> td = model.transform(stringIndDf)</span> | 
|  | <span class="sd">    >>> sorted(set([(i[0], i[1]) for i in td.select(td.id, td.indexed).collect()]),</span> | 
|  | <span class="sd">    ...     key=lambda x: x[0])</span> | 
|  | <span class="sd">    [(0, 0.0), (1, 2.0), (2, 1.0), (3, 0.0), (4, 0.0), (5, 1.0)]</span> | 
|  | <span class="sd">    >>> inverter = IndexToString(inputCol="indexed", outputCol="label2", labels=model.labels)</span> | 
|  | <span class="sd">    >>> itd = inverter.transform(td)</span> | 
|  | <span class="sd">    >>> sorted(set([(i[0], str(i[1])) for i in itd.select(itd.id, itd.label2).collect()]),</span> | 
|  | <span class="sd">    ...     key=lambda x: x[0])</span> | 
|  | <span class="sd">    [(0, 'a'), (1, 'b'), (2, 'c'), (3, 'a'), (4, 'a'), (5, 'c')]</span> | 
|  | <span class="sd">    >>> stringIndexerPath = temp_path + "/string-indexer"</span> | 
|  | <span class="sd">    >>> stringIndexer.save(stringIndexerPath)</span> | 
|  | <span class="sd">    >>> loadedIndexer = StringIndexer.load(stringIndexerPath)</span> | 
|  | <span class="sd">    >>> loadedIndexer.getHandleInvalid() == stringIndexer.getHandleInvalid()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/string-indexer-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = StringIndexerModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.labels == model.labels</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> indexToStringPath = temp_path + "/index-to-string"</span> | 
|  | <span class="sd">    >>> inverter.save(indexToStringPath)</span> | 
|  | <span class="sd">    >>> loadedInverter = IndexToString.load(indexToStringPath)</span> | 
|  | <span class="sd">    >>> loadedInverter.getLabels() == inverter.getLabels()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.transform(stringIndDf).take(1) == model.transform(stringIndDf).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> stringIndexer.getStringOrderType()</span> | 
|  | <span class="sd">    'frequencyDesc'</span> | 
|  | <span class="sd">    >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed", handleInvalid="error",</span> | 
|  | <span class="sd">    ...     stringOrderType="alphabetDesc")</span> | 
|  | <span class="sd">    >>> model = stringIndexer.fit(stringIndDf)</span> | 
|  | <span class="sd">    >>> td = model.transform(stringIndDf)</span> | 
|  | <span class="sd">    >>> sorted(set([(i[0], i[1]) for i in td.select(td.id, td.indexed).collect()]),</span> | 
|  | <span class="sd">    ...     key=lambda x: x[0])</span> | 
|  | <span class="sd">    [(0, 2.0), (1, 1.0), (2, 0.0), (3, 2.0), (4, 2.0), (5, 0.0)]</span> | 
|  | <span class="sd">    >>> fromlabelsModel = StringIndexerModel.from_labels(["a", "b", "c"],</span> | 
|  | <span class="sd">    ...     inputCol="label", outputCol="indexed", handleInvalid="error")</span> | 
|  | <span class="sd">    >>> result = fromlabelsModel.transform(stringIndDf)</span> | 
|  | <span class="sd">    >>> sorted(set([(i[0], i[1]) for i in result.select(result.id, result.indexed).collect()]),</span> | 
|  | <span class="sd">    ...     key=lambda x: x[0])</span> | 
|  | <span class="sd">    [(0, 0.0), (1, 1.0), (2, 2.0), (3, 0.0), (4, 0.0), (5, 2.0)]</span> | 
|  | <span class="sd">    >>> testData = sc.parallelize([Row(id=0, label1="a", label2="e"),</span> | 
|  | <span class="sd">    ...                            Row(id=1, label1="b", label2="f"),</span> | 
|  | <span class="sd">    ...                            Row(id=2, label1="c", label2="e"),</span> | 
|  | <span class="sd">    ...                            Row(id=3, label1="a", label2="f"),</span> | 
|  | <span class="sd">    ...                            Row(id=4, label1="a", label2="f"),</span> | 
|  | <span class="sd">    ...                            Row(id=5, label1="c", label2="f")], 3)</span> | 
|  | <span class="sd">    >>> multiRowDf = spark.createDataFrame(testData)</span> | 
|  | <span class="sd">    >>> inputs = ["label1", "label2"]</span> | 
|  | <span class="sd">    >>> outputs = ["index1", "index2"]</span> | 
|  | <span class="sd">    >>> stringIndexer = StringIndexer(inputCols=inputs, outputCols=outputs)</span> | 
|  | <span class="sd">    >>> model = stringIndexer.fit(multiRowDf)</span> | 
|  | <span class="sd">    >>> result = model.transform(multiRowDf)</span> | 
|  | <span class="sd">    >>> sorted(set([(i[0], i[1], i[2]) for i in result.select(result.id, result.index1,</span> | 
|  | <span class="sd">    ...     result.index2).collect()]), key=lambda x: x[0])</span> | 
|  | <span class="sd">    [(0, 0.0, 1.0), (1, 2.0, 0.0), (2, 1.0, 1.0), (3, 0.0, 0.0), (4, 0.0, 0.0), (5, 1.0, 0.0)]</span> | 
|  | <span class="sd">    >>> fromlabelsModel = StringIndexerModel.from_arrays_of_labels([["a", "b", "c"], ["e", "f"]],</span> | 
|  | <span class="sd">    ...     inputCols=inputs, outputCols=outputs)</span> | 
|  | <span class="sd">    >>> result = fromlabelsModel.transform(multiRowDf)</span> | 
|  | <span class="sd">    >>> sorted(set([(i[0], i[1], i[2]) for i in result.select(result.id, result.index1,</span> | 
|  | <span class="sd">    ...     result.index2).collect()]), key=lambda x: x[0])</span> | 
|  | <span class="sd">    [(0, 0.0, 0.0), (1, 1.0, 1.0), (2, 2.0, 0.0), (3, 0.0, 1.0), (4, 0.0, 1.0), (5, 2.0, 1.0)]</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">stringOrderType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">stringOrderType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="n">stringOrderType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"frequencyDesc"</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, inputCol=None, outputCol=None, inputCols=None, outputCols=None, \</span> | 
|  | <span class="sd">                 handleInvalid="error", stringOrderType="frequencyDesc")</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">StringIndexer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.StringIndexer"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">stringOrderType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"StringIndexer"</span><span class="p">:</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">stringOrderType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"StringIndexer"</span><span class="p">:</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexer.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexer.html#pyspark.ml.feature.StringIndexer.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="n">stringOrderType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"frequencyDesc"</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"StringIndexer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, inputCol=None, outputCol=None, inputCols=None, outputCols=None, \</span> | 
|  | <span class="sd">                  handleInvalid="error", stringOrderType="frequencyDesc")</span> | 
|  | <span class="sd">        Sets params for this StringIndexer.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StringIndexerModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">StringIndexerModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexer.setStringOrderType"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexer.html#pyspark.ml.feature.StringIndexer.setStringOrderType">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setStringOrderType</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StringIndexer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`stringOrderType`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">stringOrderType</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexer.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexer.html#pyspark.ml.feature.StringIndexer.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StringIndexer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexer.setInputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexer.html#pyspark.ml.feature.StringIndexer.setInputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"StringIndexer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexer.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexer.html#pyspark.ml.feature.StringIndexer.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StringIndexer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexer.setOutputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexer.html#pyspark.ml.feature.StringIndexer.setOutputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"StringIndexer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexer.setHandleInvalid"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexer.html#pyspark.ml.feature.StringIndexer.setHandleInvalid">[docs]</a>    <span class="k">def</span> <span class="nf">setHandleInvalid</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StringIndexer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`handleInvalid`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">handleInvalid</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexerModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexerModel.html#pyspark.ml.feature.StringIndexerModel">[docs]</a><span class="k">class</span> <span class="nc">StringIndexerModel</span><span class="p">(</span> | 
|  | <span class="n">JavaModel</span><span class="p">,</span> <span class="n">_StringIndexerParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"StringIndexerModel"</span><span class="p">],</span> <span class="n">JavaMLWritable</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`StringIndexer`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexerModel.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexerModel.html#pyspark.ml.feature.StringIndexerModel.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StringIndexerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexerModel.setInputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexerModel.html#pyspark.ml.feature.StringIndexerModel.setInputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"StringIndexerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexerModel.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexerModel.html#pyspark.ml.feature.StringIndexerModel.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StringIndexerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexerModel.setOutputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexerModel.html#pyspark.ml.feature.StringIndexerModel.setOutputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"StringIndexerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexerModel.setHandleInvalid"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexerModel.html#pyspark.ml.feature.StringIndexerModel.setHandleInvalid">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setHandleInvalid</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StringIndexerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`handleInvalid`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">handleInvalid</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexerModel.from_labels"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexerModel.html#pyspark.ml.feature.StringIndexerModel.from_labels">[docs]</a>    <span class="nd">@classmethod</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">from_labels</span><span class="p">(</span> | 
|  | <span class="bp">cls</span><span class="p">,</span> | 
|  | <span class="n">labels</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"StringIndexerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Construct the model directly from an array of label strings,</span> | 
|  | <span class="sd">        requires an active SparkContext.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.core.context</span> <span class="kn">import</span> <span class="n">SparkContext</span> | 
|  |  | 
|  | <span class="n">sc</span> <span class="o">=</span> <span class="n">SparkContext</span><span class="o">.</span><span class="n">_active_spark_context</span> | 
|  | <span class="k">assert</span> <span class="n">sc</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">sc</span><span class="o">.</span><span class="n">_gateway</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> | 
|  | <span class="n">java_class</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_gateway</span><span class="o">.</span><span class="n">jvm</span><span class="o">.</span><span class="n">java</span><span class="o">.</span><span class="n">lang</span><span class="o">.</span><span class="n">String</span> | 
|  | <span class="n">jlabels</span> <span class="o">=</span> <span class="n">StringIndexerModel</span><span class="o">.</span><span class="n">_new_java_array</span><span class="p">(</span><span class="n">labels</span><span class="p">,</span> <span class="n">java_class</span><span class="p">)</span> | 
|  | <span class="n">model</span> <span class="o">=</span> <span class="n">StringIndexerModel</span><span class="o">.</span><span class="n">_create_from_java_class</span><span class="p">(</span> | 
|  | <span class="s2">"org.apache.spark.ml.feature.StringIndexerModel"</span><span class="p">,</span> <span class="n">jlabels</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">model</span><span class="o">.</span><span class="n">setInputCol</span><span class="p">(</span><span class="n">inputCol</span><span class="p">)</span> | 
|  | <span class="k">if</span> <span class="n">outputCol</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="n">model</span><span class="o">.</span><span class="n">setOutputCol</span><span class="p">(</span><span class="n">outputCol</span><span class="p">)</span> | 
|  | <span class="k">if</span> <span class="n">handleInvalid</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="n">model</span><span class="o">.</span><span class="n">setHandleInvalid</span><span class="p">(</span><span class="n">handleInvalid</span><span class="p">)</span> | 
|  | <span class="k">return</span> <span class="n">model</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StringIndexerModel.from_arrays_of_labels"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StringIndexerModel.html#pyspark.ml.feature.StringIndexerModel.from_arrays_of_labels">[docs]</a>    <span class="nd">@classmethod</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">from_arrays_of_labels</span><span class="p">(</span> | 
|  | <span class="bp">cls</span><span class="p">,</span> | 
|  | <span class="n">arrayOfLabels</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]],</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"StringIndexerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Construct the model directly from an array of array of label strings,</span> | 
|  | <span class="sd">        requires an active SparkContext.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.core.context</span> <span class="kn">import</span> <span class="n">SparkContext</span> | 
|  |  | 
|  | <span class="n">sc</span> <span class="o">=</span> <span class="n">SparkContext</span><span class="o">.</span><span class="n">_active_spark_context</span> | 
|  | <span class="k">assert</span> <span class="n">sc</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">sc</span><span class="o">.</span><span class="n">_gateway</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> | 
|  | <span class="n">java_class</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_gateway</span><span class="o">.</span><span class="n">jvm</span><span class="o">.</span><span class="n">java</span><span class="o">.</span><span class="n">lang</span><span class="o">.</span><span class="n">String</span> | 
|  | <span class="n">jlabels</span> <span class="o">=</span> <span class="n">StringIndexerModel</span><span class="o">.</span><span class="n">_new_java_array</span><span class="p">(</span><span class="n">arrayOfLabels</span><span class="p">,</span> <span class="n">java_class</span><span class="p">)</span> | 
|  | <span class="n">model</span> <span class="o">=</span> <span class="n">StringIndexerModel</span><span class="o">.</span><span class="n">_create_from_java_class</span><span class="p">(</span> | 
|  | <span class="s2">"org.apache.spark.ml.feature.StringIndexerModel"</span><span class="p">,</span> <span class="n">jlabels</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">model</span><span class="o">.</span><span class="n">setInputCols</span><span class="p">(</span><span class="n">inputCols</span><span class="p">)</span> | 
|  | <span class="k">if</span> <span class="n">outputCols</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="n">model</span><span class="o">.</span><span class="n">setOutputCols</span><span class="p">(</span><span class="n">outputCols</span><span class="p">)</span> | 
|  | <span class="k">if</span> <span class="n">handleInvalid</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span> | 
|  | <span class="n">model</span><span class="o">.</span><span class="n">setHandleInvalid</span><span class="p">(</span><span class="n">handleInvalid</span><span class="p">)</span> | 
|  | <span class="k">return</span> <span class="n">model</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">labels</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Ordered list of labels, corresponding to indices to be assigned.</span> | 
|  |  | 
|  | <span class="sd">        .. deprecated:: 3.1.0</span> | 
|  | <span class="sd">            It will be removed in future versions. Use `labelsArray` method instead.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"labels"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.2"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">labelsArray</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Array of ordered list of labels, corresponding to indices to be assigned</span> | 
|  | <span class="sd">        for each input column.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"labelsArray"</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="IndexToString"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.IndexToString.html#pyspark.ml.feature.IndexToString">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">IndexToString</span><span class="p">(</span> | 
|  | <span class="n">JavaTransformer</span><span class="p">,</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"IndexToString"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    A :py:class:`pyspark.ml.base.Transformer` that maps a column of indices back to a new column of</span> | 
|  | <span class="sd">    corresponding string values.</span> | 
|  | <span class="sd">    The index-string mapping is either from the ML attributes of the input column,</span> | 
|  | <span class="sd">    or from user-supplied labels (which take precedence over ML attributes).</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.6.0</span> | 
|  |  | 
|  | <span class="sd">    See Also</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    StringIndexer : for converting categorical values into category indices</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">labels</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"labels"</span><span class="p">,</span> | 
|  | <span class="s2">"Optional array of labels specifying index-string mapping."</span> | 
|  | <span class="o">+</span> <span class="s2">" If not provided or if empty, then metadata from inputCol is used instead."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toListString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">labels</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, inputCol=None, outputCol=None, labels=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">IndexToString</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.IndexToString"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="IndexToString.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.IndexToString.html#pyspark.ml.feature.IndexToString.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">labels</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"IndexToString"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, inputCol=None, outputCol=None, labels=None)</span> | 
|  | <span class="sd">        Sets params for this IndexToString.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="IndexToString.setLabels"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.IndexToString.html#pyspark.ml.feature.IndexToString.setLabels">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setLabels</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"IndexToString"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`labels`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">labels</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="IndexToString.getLabels"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.IndexToString.html#pyspark.ml.feature.IndexToString.getLabels">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getLabels</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`labels` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="IndexToString.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.IndexToString.html#pyspark.ml.feature.IndexToString.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IndexToString"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="IndexToString.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.IndexToString.html#pyspark.ml.feature.IndexToString.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IndexToString"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="StopWordsRemover"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StopWordsRemover.html#pyspark.ml.feature.StopWordsRemover">[docs]</a><span class="k">class</span> <span class="nc">StopWordsRemover</span><span class="p">(</span> | 
|  | <span class="n">JavaTransformer</span><span class="p">,</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">HasInputCols</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCols</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"StopWordsRemover"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    A feature transformer that filters out stop words from input.</span> | 
|  | <span class="sd">    Since 3.0.0, :py:class:`StopWordsRemover` can filter out multiple columns at once by setting</span> | 
|  | <span class="sd">    the :py:attr:`inputCols` parameter. Note that when both the :py:attr:`inputCol` and</span> | 
|  | <span class="sd">    :py:attr:`inputCols` parameters are set, an Exception will be thrown.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.6.0</span> | 
|  |  | 
|  | <span class="sd">    Notes</span> | 
|  | <span class="sd">    -----</span> | 
|  | <span class="sd">    null values from input array are preserved unless adding null to stopWords explicitly.</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(["a", "b", "c"],)], ["text"])</span> | 
|  | <span class="sd">    >>> remover = StopWordsRemover(stopWords=["b"])</span> | 
|  | <span class="sd">    >>> remover.setInputCol("text")</span> | 
|  | <span class="sd">    StopWordsRemover...</span> | 
|  | <span class="sd">    >>> remover.setOutputCol("words")</span> | 
|  | <span class="sd">    StopWordsRemover...</span> | 
|  | <span class="sd">    >>> remover.transform(df).head().words == ['a', 'c']</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> stopWordsRemoverPath = temp_path + "/stopwords-remover"</span> | 
|  | <span class="sd">    >>> remover.save(stopWordsRemoverPath)</span> | 
|  | <span class="sd">    >>> loadedRemover = StopWordsRemover.load(stopWordsRemoverPath)</span> | 
|  | <span class="sd">    >>> loadedRemover.getStopWords() == remover.getStopWords()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedRemover.getCaseSensitive() == remover.getCaseSensitive()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedRemover.transform(df).take(1) == remover.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> df2 = spark.createDataFrame([(["a", "b", "c"], ["a", "b"])], ["text1", "text2"])</span> | 
|  | <span class="sd">    >>> remover2 = StopWordsRemover(stopWords=["b"])</span> | 
|  | <span class="sd">    >>> remover2.setInputCols(["text1", "text2"]).setOutputCols(["words1", "words2"])</span> | 
|  | <span class="sd">    StopWordsRemover...</span> | 
|  | <span class="sd">    >>> remover2.transform(df2).show()</span> | 
|  | <span class="sd">    +---------+------+------+------+</span> | 
|  | <span class="sd">    |    text1| text2|words1|words2|</span> | 
|  | <span class="sd">    +---------+------+------+------+</span> | 
|  | <span class="sd">    |[a, b, c]|[a, b]|[a, c]|   [a]|</span> | 
|  | <span class="sd">    +---------+------+------+------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">stopWords</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"stopWords"</span><span class="p">,</span> | 
|  | <span class="s2">"The words to be filtered out"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toListString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">caseSensitive</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"caseSensitive"</span><span class="p">,</span> | 
|  | <span class="s2">"whether to do a case sensitive "</span> <span class="o">+</span> <span class="s2">"comparison over the stop words"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toBoolean</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">locale</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"locale"</span><span class="p">,</span> | 
|  | <span class="s2">"locale of the input. ignored when case sensitive "</span> <span class="o">+</span> <span class="s2">"is true"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">stopWords</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">caseSensitive</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">locale</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">stopWords</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">caseSensitive</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">locale</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">stopWords</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">caseSensitive</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">locale</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false, \</span> | 
|  | <span class="sd">                 locale=None, inputCols=None, outputCols=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">StopWordsRemover</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> | 
|  | <span class="s2">"org.apache.spark.ml.feature.StopWordsRemover"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> | 
|  | <span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span> | 
|  | <span class="n">stopWords</span><span class="o">=</span><span class="n">StopWordsRemover</span><span class="o">.</span><span class="n">loadDefaultStopWords</span><span class="p">(</span><span class="s2">"english"</span><span class="p">),</span> | 
|  | <span class="n">caseSensitive</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">locale</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span><span class="o">.</span><span class="n">getLocale</span><span class="p">(),</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">stopWords</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">caseSensitive</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">locale</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"StopWordsRemover"</span><span class="p">:</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <span class="nd">@overload</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">stopWords</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">caseSensitive</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">locale</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="o">...</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"StopWordsRemover"</span><span class="p">:</span> | 
|  | <span class="o">...</span> | 
|  |  | 
|  | <div class="viewcode-block" id="StopWordsRemover.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StopWordsRemover.html#pyspark.ml.feature.StopWordsRemover.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">stopWords</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">caseSensitive</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">locale</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"StopWordsRemover"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false, \</span> | 
|  | <span class="sd">                  locale=None, inputCols=None, outputCols=None)</span> | 
|  | <span class="sd">        Sets params for this StopWordRemover.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StopWordsRemover.setStopWords"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StopWordsRemover.html#pyspark.ml.feature.StopWordsRemover.setStopWords">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setStopWords</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"StopWordsRemover"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`stopWords`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">stopWords</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StopWordsRemover.getStopWords"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StopWordsRemover.html#pyspark.ml.feature.StopWordsRemover.getStopWords">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getStopWords</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`stopWords` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">stopWords</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StopWordsRemover.setCaseSensitive"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StopWordsRemover.html#pyspark.ml.feature.StopWordsRemover.setCaseSensitive">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setCaseSensitive</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StopWordsRemover"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`caseSensitive`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">caseSensitive</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StopWordsRemover.getCaseSensitive"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StopWordsRemover.html#pyspark.ml.feature.StopWordsRemover.getCaseSensitive">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getCaseSensitive</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`caseSensitive` or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">caseSensitive</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StopWordsRemover.setLocale"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StopWordsRemover.html#pyspark.ml.feature.StopWordsRemover.setLocale">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setLocale</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StopWordsRemover"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`locale`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">locale</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StopWordsRemover.getLocale"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StopWordsRemover.html#pyspark.ml.feature.StopWordsRemover.getLocale">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getLocale</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`locale`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">locale</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StopWordsRemover.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StopWordsRemover.html#pyspark.ml.feature.StopWordsRemover.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StopWordsRemover"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StopWordsRemover.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StopWordsRemover.html#pyspark.ml.feature.StopWordsRemover.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"StopWordsRemover"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StopWordsRemover.setInputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StopWordsRemover.html#pyspark.ml.feature.StopWordsRemover.setInputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"StopWordsRemover"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StopWordsRemover.setOutputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StopWordsRemover.html#pyspark.ml.feature.StopWordsRemover.setOutputCols">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"StopWordsRemover"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="StopWordsRemover.loadDefaultStopWords"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.StopWordsRemover.html#pyspark.ml.feature.StopWordsRemover.loadDefaultStopWords">[docs]</a>    <span class="nd">@staticmethod</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">loadDefaultStopWords</span><span class="p">(</span><span class="n">language</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Loads the default stop words for the given language.</span> | 
|  | <span class="sd">        Supported languages: danish, dutch, english, finnish, french, german, hungarian,</span> | 
|  | <span class="sd">        italian, norwegian, portuguese, russian, spanish, swedish, turkish</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">stopWordsObj</span> <span class="o">=</span> <span class="n">_jvm</span><span class="p">()</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">ml</span><span class="o">.</span><span class="n">feature</span><span class="o">.</span><span class="n">StopWordsRemover</span> | 
|  | <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="n">stopWordsObj</span><span class="o">.</span><span class="n">loadDefaultStopWords</span><span class="p">(</span><span class="n">language</span><span class="p">))</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="Tokenizer"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Tokenizer.html#pyspark.ml.feature.Tokenizer">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">Tokenizer</span><span class="p">(</span> | 
|  | <span class="n">JavaTransformer</span><span class="p">,</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"Tokenizer"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    A tokenizer that converts the input string to lowercase and then</span> | 
|  | <span class="sd">    splits it by white spaces.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.3.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([("a b c",)], ["text"])</span> | 
|  | <span class="sd">    >>> tokenizer = Tokenizer(outputCol="words")</span> | 
|  | <span class="sd">    >>> tokenizer.setInputCol("text")</span> | 
|  | <span class="sd">    Tokenizer...</span> | 
|  | <span class="sd">    >>> tokenizer.transform(df).head()</span> | 
|  | <span class="sd">    Row(text='a b c', words=['a', 'b', 'c'])</span> | 
|  | <span class="sd">    >>> # Change a parameter.</span> | 
|  | <span class="sd">    >>> tokenizer.setParams(outputCol="tokens").transform(df).head()</span> | 
|  | <span class="sd">    Row(text='a b c', tokens=['a', 'b', 'c'])</span> | 
|  | <span class="sd">    >>> # Temporarily modify a parameter.</span> | 
|  | <span class="sd">    >>> tokenizer.transform(df, {tokenizer.outputCol: "words"}).head()</span> | 
|  | <span class="sd">    Row(text='a b c', words=['a', 'b', 'c'])</span> | 
|  | <span class="sd">    >>> tokenizer.transform(df).head()</span> | 
|  | <span class="sd">    Row(text='a b c', tokens=['a', 'b', 'c'])</span> | 
|  | <span class="sd">    >>> # Must use keyword arguments to specify params.</span> | 
|  | <span class="sd">    >>> tokenizer.setParams("text")</span> | 
|  | <span class="sd">    Traceback (most recent call last):</span> | 
|  | <span class="sd">        ...</span> | 
|  | <span class="sd">    TypeError: Method setParams forces keyword arguments.</span> | 
|  | <span class="sd">    >>> tokenizerPath = temp_path + "/tokenizer"</span> | 
|  | <span class="sd">    >>> tokenizer.save(tokenizerPath)</span> | 
|  | <span class="sd">    >>> loadedTokenizer = Tokenizer.load(tokenizerPath)</span> | 
|  | <span class="sd">    >>> loadedTokenizer.transform(df).head().tokens == tokenizer.transform(df).head().tokens</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">Tokenizer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.Tokenizer"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="Tokenizer.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Tokenizer.html#pyspark.ml.feature.Tokenizer.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"Tokenizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        Sets params for this Tokenizer.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Tokenizer.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Tokenizer.html#pyspark.ml.feature.Tokenizer.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Tokenizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Tokenizer.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Tokenizer.html#pyspark.ml.feature.Tokenizer.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Tokenizer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="VectorAssembler"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorAssembler.html#pyspark.ml.feature.VectorAssembler">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">VectorAssembler</span><span class="p">(</span> | 
|  | <span class="n">JavaTransformer</span><span class="p">,</span> | 
|  | <span class="n">HasInputCols</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">HasHandleInvalid</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"VectorAssembler"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    A feature transformer that merges multiple columns into a vector column.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(1, 0, 3)], ["a", "b", "c"])</span> | 
|  | <span class="sd">    >>> vecAssembler = VectorAssembler(outputCol="features")</span> | 
|  | <span class="sd">    >>> vecAssembler.setInputCols(["a", "b", "c"])</span> | 
|  | <span class="sd">    VectorAssembler...</span> | 
|  | <span class="sd">    >>> vecAssembler.transform(df).head().features</span> | 
|  | <span class="sd">    DenseVector([1.0, 0.0, 3.0])</span> | 
|  | <span class="sd">    >>> vecAssembler.setParams(outputCol="freqs").transform(df).head().freqs</span> | 
|  | <span class="sd">    DenseVector([1.0, 0.0, 3.0])</span> | 
|  | <span class="sd">    >>> params = {vecAssembler.inputCols: ["b", "a"], vecAssembler.outputCol: "vector"}</span> | 
|  | <span class="sd">    >>> vecAssembler.transform(df, params).head().vector</span> | 
|  | <span class="sd">    DenseVector([0.0, 1.0])</span> | 
|  | <span class="sd">    >>> vectorAssemblerPath = temp_path + "/vector-assembler"</span> | 
|  | <span class="sd">    >>> vecAssembler.save(vectorAssemblerPath)</span> | 
|  | <span class="sd">    >>> loadedAssembler = VectorAssembler.load(vectorAssemblerPath)</span> | 
|  | <span class="sd">    >>> loadedAssembler.transform(df).head().freqs == vecAssembler.transform(df).head().freqs</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> dfWithNullsAndNaNs = spark.createDataFrame(</span> | 
|  | <span class="sd">    ...    [(1.0, 2.0, None), (3.0, float("nan"), 4.0), (5.0, 6.0, 7.0)], ["a", "b", "c"])</span> | 
|  | <span class="sd">    >>> vecAssembler2 = VectorAssembler(inputCols=["a", "b", "c"], outputCol="features",</span> | 
|  | <span class="sd">    ...    handleInvalid="keep")</span> | 
|  | <span class="sd">    >>> vecAssembler2.transform(dfWithNullsAndNaNs).show()</span> | 
|  | <span class="sd">    +---+---+----+-------------+</span> | 
|  | <span class="sd">    |  a|  b|   c|     features|</span> | 
|  | <span class="sd">    +---+---+----+-------------+</span> | 
|  | <span class="sd">    |1.0|2.0|NULL|[1.0,2.0,NaN]|</span> | 
|  | <span class="sd">    |3.0|NaN| 4.0|[3.0,NaN,4.0]|</span> | 
|  | <span class="sd">    |5.0|6.0| 7.0|[5.0,6.0,7.0]|</span> | 
|  | <span class="sd">    +---+---+----+-------------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> vecAssembler2.setParams(handleInvalid="skip").transform(dfWithNullsAndNaNs).show()</span> | 
|  | <span class="sd">    +---+---+---+-------------+</span> | 
|  | <span class="sd">    |  a|  b|  c|     features|</span> | 
|  | <span class="sd">    +---+---+---+-------------+</span> | 
|  | <span class="sd">    |5.0|6.0|7.0|[5.0,6.0,7.0]|</span> | 
|  | <span class="sd">    +---+---+---+-------------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"handleInvalid"</span><span class="p">,</span> | 
|  | <span class="s2">"How to handle invalid data (NULL "</span> | 
|  | <span class="o">+</span> <span class="s2">"and NaN values). Options are 'skip' (filter out rows with invalid "</span> | 
|  | <span class="o">+</span> <span class="s2">"data), 'error' (throw an error), or 'keep' (return relevant number "</span> | 
|  | <span class="o">+</span> <span class="s2">"of NaN in the output). Column lengths are taken from the size of ML "</span> | 
|  | <span class="o">+</span> <span class="s2">"Attribute Group, which can be set using `VectorSizeHint` in a "</span> | 
|  | <span class="o">+</span> <span class="s2">"pipeline before `VectorAssembler`. Column lengths can also be "</span> | 
|  | <span class="o">+</span> <span class="s2">"inferred from first rows of the data since it is safe to do so but "</span> | 
|  | <span class="o">+</span> <span class="s2">"only in case of 'error' or 'skip')."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, inputCols=None, outputCol=None, handleInvalid="error")</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">VectorAssembler</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.VectorAssembler"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">handleInvalid</span><span class="o">=</span><span class="s2">"error"</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorAssembler.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorAssembler.html#pyspark.ml.feature.VectorAssembler.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorAssembler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, inputCols=None, outputCol=None, handleInvalid="error")</span> | 
|  | <span class="sd">        Sets params for this VectorAssembler.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorAssembler.setInputCols"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorAssembler.html#pyspark.ml.feature.VectorAssembler.setInputCols">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCols</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"VectorAssembler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCols`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCols</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorAssembler.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorAssembler.html#pyspark.ml.feature.VectorAssembler.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorAssembler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorAssembler.setHandleInvalid"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorAssembler.html#pyspark.ml.feature.VectorAssembler.setHandleInvalid">[docs]</a>    <span class="k">def</span> <span class="nf">setHandleInvalid</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorAssembler"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`handleInvalid`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">handleInvalid</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_VectorIndexerParams</span><span class="p">(</span><span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">,</span> <span class="n">HasHandleInvalid</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`VectorIndexer` and :py:class:`VectorIndexerModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">maxCategories</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"maxCategories"</span><span class="p">,</span> | 
|  | <span class="s2">"Threshold for the number of values a categorical feature can take "</span> | 
|  | <span class="o">+</span> <span class="s2">"(>= 2). If a feature is found to have > maxCategories values, then "</span> | 
|  | <span class="o">+</span> <span class="s2">"it is declared continuous."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"handleInvalid"</span><span class="p">,</span> | 
|  | <span class="s2">"How to handle invalid data "</span> | 
|  | <span class="o">+</span> <span class="s2">"(unseen labels or NULL values). Options are 'skip' (filter out "</span> | 
|  | <span class="o">+</span> <span class="s2">"rows with invalid data), 'error' (throw an error), or 'keep' (put "</span> | 
|  | <span class="o">+</span> <span class="s2">"invalid data in a special additional bucket, at index of the number "</span> | 
|  | <span class="o">+</span> <span class="s2">"of categories of the feature)."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_VectorIndexerParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">maxCategories</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">handleInvalid</span><span class="o">=</span><span class="s2">"error"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getMaxCategories</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of maxCategories or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">maxCategories</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="VectorIndexer"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorIndexer.html#pyspark.ml.feature.VectorIndexer">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">VectorIndexer</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">[</span><span class="s2">"VectorIndexerModel"</span><span class="p">],</span> | 
|  | <span class="n">_VectorIndexerParams</span><span class="p">,</span> | 
|  | <span class="n">HasHandleInvalid</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"VectorIndexer"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Class for indexing categorical feature columns in a dataset of `Vector`.</span> | 
|  |  | 
|  | <span class="sd">    This has 2 usage modes:</span> | 
|  | <span class="sd">      - Automatically identify categorical features (default behavior)</span> | 
|  | <span class="sd">         - This helps process a dataset of unknown vectors into a dataset with some continuous</span> | 
|  | <span class="sd">           features and some categorical features. The choice between continuous and categorical</span> | 
|  | <span class="sd">           is based upon a maxCategories parameter.</span> | 
|  | <span class="sd">         - Set maxCategories to the maximum number of categorical any categorical feature should</span> | 
|  | <span class="sd">           have.</span> | 
|  | <span class="sd">         - E.g.: Feature 0 has unique values {-1.0, 0.0}, and feature 1 values {1.0, 3.0, 5.0}.</span> | 
|  | <span class="sd">           If maxCategories = 2, then feature 0 will be declared categorical and use indices {0, 1},</span> | 
|  | <span class="sd">           and feature 1 will be declared continuous.</span> | 
|  | <span class="sd">      - Index all features, if all features are categorical</span> | 
|  | <span class="sd">         - If maxCategories is set to be very large, then this will build an index of unique</span> | 
|  | <span class="sd">           values for all features.</span> | 
|  | <span class="sd">         - Warning: This can cause problems if features are continuous since this will collect ALL</span> | 
|  | <span class="sd">           unique values to the driver.</span> | 
|  | <span class="sd">         - E.g.: Feature 0 has unique values {-1.0, 0.0}, and feature 1 values {1.0, 3.0, 5.0}.</span> | 
|  | <span class="sd">           If maxCategories >= 3, then both features will be declared categorical.</span> | 
|  |  | 
|  | <span class="sd">     This returns a model which can transform categorical features to use 0-based indices.</span> | 
|  |  | 
|  | <span class="sd">    Index stability:</span> | 
|  | <span class="sd">      - This is not guaranteed to choose the same category index across multiple runs.</span> | 
|  | <span class="sd">      - If a categorical feature includes value 0, then this is guaranteed to map value 0 to</span> | 
|  | <span class="sd">        index 0. This maintains vector sparsity.</span> | 
|  | <span class="sd">      - More stability may be added in the future.</span> | 
|  |  | 
|  | <span class="sd">    TODO: Future extensions: The following functionality is planned for the future:</span> | 
|  | <span class="sd">      - Preserve metadata in transform; if a feature's metadata is already present,</span> | 
|  | <span class="sd">        do not recompute.</span> | 
|  | <span class="sd">      - Specify certain features to not index, either via a parameter or via existing metadata.</span> | 
|  | <span class="sd">      - Add warning if a categorical feature has only 1 category.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([(Vectors.dense([-1.0, 0.0]),),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([0.0, 1.0]),), (Vectors.dense([0.0, 2.0]),)], ["a"])</span> | 
|  | <span class="sd">    >>> indexer = VectorIndexer(maxCategories=2, inputCol="a")</span> | 
|  | <span class="sd">    >>> indexer.setOutputCol("indexed")</span> | 
|  | <span class="sd">    VectorIndexer...</span> | 
|  | <span class="sd">    >>> model = indexer.fit(df)</span> | 
|  | <span class="sd">    >>> indexer.getHandleInvalid()</span> | 
|  | <span class="sd">    'error'</span> | 
|  | <span class="sd">    >>> model.setOutputCol("output")</span> | 
|  | <span class="sd">    VectorIndexerModel...</span> | 
|  | <span class="sd">    >>> model.transform(df).head().output</span> | 
|  | <span class="sd">    DenseVector([1.0, 0.0])</span> | 
|  | <span class="sd">    >>> model.numFeatures</span> | 
|  | <span class="sd">    2</span> | 
|  | <span class="sd">    >>> model.categoryMaps</span> | 
|  | <span class="sd">    {0: {0.0: 0, -1.0: 1}}</span> | 
|  | <span class="sd">    >>> indexer.setParams(outputCol="test").fit(df).transform(df).collect()[1].test</span> | 
|  | <span class="sd">    DenseVector([0.0, 1.0])</span> | 
|  | <span class="sd">    >>> params = {indexer.maxCategories: 3, indexer.outputCol: "vector"}</span> | 
|  | <span class="sd">    >>> model2 = indexer.fit(df, params)</span> | 
|  | <span class="sd">    >>> model2.transform(df).head().vector</span> | 
|  | <span class="sd">    DenseVector([1.0, 0.0])</span> | 
|  | <span class="sd">    >>> vectorIndexerPath = temp_path + "/vector-indexer"</span> | 
|  | <span class="sd">    >>> indexer.save(vectorIndexerPath)</span> | 
|  | <span class="sd">    >>> loadedIndexer = VectorIndexer.load(vectorIndexerPath)</span> | 
|  | <span class="sd">    >>> loadedIndexer.getMaxCategories() == indexer.getMaxCategories()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/vector-indexer-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = VectorIndexerModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.numFeatures == model.numFeatures</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.categoryMaps == model.categoryMaps</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> dfWithInvalid = spark.createDataFrame([(Vectors.dense([3.0, 1.0]),)], ["a"])</span> | 
|  | <span class="sd">    >>> indexer.getHandleInvalid()</span> | 
|  | <span class="sd">    'error'</span> | 
|  | <span class="sd">    >>> model3 = indexer.setHandleInvalid("skip").fit(df)</span> | 
|  | <span class="sd">    >>> model3.transform(dfWithInvalid).count()</span> | 
|  | <span class="sd">    0</span> | 
|  | <span class="sd">    >>> model4 = indexer.setParams(handleInvalid="keep", outputCol="indexed").fit(df)</span> | 
|  | <span class="sd">    >>> model4.transform(dfWithInvalid).head().indexed</span> | 
|  | <span class="sd">    DenseVector([2.0, 1.0])</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">maxCategories</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">20</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error")</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">VectorIndexer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.VectorIndexer"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorIndexer.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorIndexer.html#pyspark.ml.feature.VectorIndexer.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">maxCategories</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">20</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorIndexer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error")</span> | 
|  | <span class="sd">        Sets params for this VectorIndexer.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorIndexer.setMaxCategories"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorIndexer.html#pyspark.ml.feature.VectorIndexer.setMaxCategories">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMaxCategories</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorIndexer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`maxCategories`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxCategories</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorIndexer.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorIndexer.html#pyspark.ml.feature.VectorIndexer.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorIndexer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorIndexer.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorIndexer.html#pyspark.ml.feature.VectorIndexer.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorIndexer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorIndexer.setHandleInvalid"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorIndexer.html#pyspark.ml.feature.VectorIndexer.setHandleInvalid">[docs]</a>    <span class="k">def</span> <span class="nf">setHandleInvalid</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorIndexer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`handleInvalid`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">handleInvalid</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorIndexerModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">VectorIndexerModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="VectorIndexerModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorIndexerModel.html#pyspark.ml.feature.VectorIndexerModel">[docs]</a><span class="k">class</span> <span class="nc">VectorIndexerModel</span><span class="p">(</span> | 
|  | <span class="n">JavaModel</span><span class="p">,</span> <span class="n">_VectorIndexerParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"VectorIndexerModel"</span><span class="p">],</span> <span class="n">JavaMLWritable</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`VectorIndexer`.</span> | 
|  |  | 
|  | <span class="sd">    Transform categorical features to use 0-based indices instead of their original values.</span> | 
|  | <span class="sd">      - Categorical features are mapped to indices.</span> | 
|  | <span class="sd">      - Continuous features (columns) are left unchanged.</span> | 
|  |  | 
|  | <span class="sd">    This also appends metadata to the output column, marking features as Numeric (continuous),</span> | 
|  | <span class="sd">    Nominal (categorical), or Binary (either continuous or categorical).</span> | 
|  | <span class="sd">    Non-ML metadata is not carried over from the input to the output column.</span> | 
|  |  | 
|  | <span class="sd">    This maintains vector sparsity.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorIndexerModel.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorIndexerModel.html#pyspark.ml.feature.VectorIndexerModel.setInputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorIndexerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorIndexerModel.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorIndexerModel.html#pyspark.ml.feature.VectorIndexerModel.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorIndexerModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">numFeatures</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Number of features, i.e., length of Vectors which this transforms.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"numFeatures"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">categoryMaps</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">]]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Feature value index.  Keys are categorical feature indices (column indices).</span> | 
|  | <span class="sd">        Values are maps from original features values to 0-based category indices.</span> | 
|  | <span class="sd">        If a feature is not in this map, it is treated as continuous.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"javaCategoryMaps"</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="VectorSlicer"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorSlicer.html#pyspark.ml.feature.VectorSlicer">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">VectorSlicer</span><span class="p">(</span> | 
|  | <span class="n">JavaTransformer</span><span class="p">,</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> | 
|  | <span class="n">HasOutputCol</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"VectorSlicer"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    This class takes a feature vector and outputs a new feature vector with a subarray</span> | 
|  | <span class="sd">    of the original features.</span> | 
|  |  | 
|  | <span class="sd">    The subset of features can be specified with either indices (`setIndices()`)</span> | 
|  | <span class="sd">    or names (`setNames()`).  At least one feature must be selected. Duplicate features</span> | 
|  | <span class="sd">    are not allowed, so there can be no overlap between selected indices and names.</span> | 
|  |  | 
|  | <span class="sd">    The output vector will order features with the selected indices first (in the order given),</span> | 
|  | <span class="sd">    followed by the selected names (in the order given).</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.6.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([</span> | 
|  | <span class="sd">    ...     (Vectors.dense([-2.0, 2.3, 0.0, 0.0, 1.0]),),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([0.0, 0.0, 0.0, 0.0, 0.0]),),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([0.6, -1.1, -3.0, 4.5, 3.3]),)], ["features"])</span> | 
|  | <span class="sd">    >>> vs = VectorSlicer(outputCol="sliced", indices=[1, 4])</span> | 
|  | <span class="sd">    >>> vs.setInputCol("features")</span> | 
|  | <span class="sd">    VectorSlicer...</span> | 
|  | <span class="sd">    >>> vs.transform(df).head().sliced</span> | 
|  | <span class="sd">    DenseVector([2.3, 1.0])</span> | 
|  | <span class="sd">    >>> vectorSlicerPath = temp_path + "/vector-slicer"</span> | 
|  | <span class="sd">    >>> vs.save(vectorSlicerPath)</span> | 
|  | <span class="sd">    >>> loadedVs = VectorSlicer.load(vectorSlicerPath)</span> | 
|  | <span class="sd">    >>> loadedVs.getIndices() == vs.getIndices()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedVs.getNames() == vs.getNames()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedVs.transform(df).take(1) == vs.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">indices</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"indices"</span><span class="p">,</span> | 
|  | <span class="s2">"An array of indices to select features from "</span> | 
|  | <span class="o">+</span> <span class="s2">"a vector column. There can be no overlap with names."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toListInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">names</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"names"</span><span class="p">,</span> | 
|  | <span class="s2">"An array of feature names to select features from "</span> | 
|  | <span class="o">+</span> <span class="s2">"a vector column. These names must be specified by ML "</span> | 
|  | <span class="o">+</span> <span class="s2">"org.apache.spark.ml.attribute.Attribute. There can be no overlap with "</span> | 
|  | <span class="o">+</span> <span class="s2">"indices."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toListString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">indices</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">names</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, inputCol=None, outputCol=None, indices=None, names=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">VectorSlicer</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.VectorSlicer"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">indices</span><span class="o">=</span><span class="p">[],</span> <span class="n">names</span><span class="o">=</span><span class="p">[])</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorSlicer.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorSlicer.html#pyspark.ml.feature.VectorSlicer.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">indices</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">names</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorSlicer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, inputCol=None, outputCol=None, indices=None, names=None):</span> | 
|  | <span class="sd">        Sets params for this VectorSlicer.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorSlicer.setIndices"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorSlicer.html#pyspark.ml.feature.VectorSlicer.setIndices">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setIndices</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"VectorSlicer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`indices`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">indices</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorSlicer.getIndices"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorSlicer.html#pyspark.ml.feature.VectorSlicer.getIndices">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getIndices</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of indices or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">indices</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorSlicer.setNames"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorSlicer.html#pyspark.ml.feature.VectorSlicer.setNames">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setNames</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"VectorSlicer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`names`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">names</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorSlicer.getNames"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorSlicer.html#pyspark.ml.feature.VectorSlicer.getNames">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getNames</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of names or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">names</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorSlicer.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorSlicer.html#pyspark.ml.feature.VectorSlicer.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorSlicer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorSlicer.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorSlicer.html#pyspark.ml.feature.VectorSlicer.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorSlicer"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_Word2VecParams</span><span class="p">(</span><span class="n">HasStepSize</span><span class="p">,</span> <span class="n">HasMaxIter</span><span class="p">,</span> <span class="n">HasSeed</span><span class="p">,</span> <span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`Word2Vec` and :py:class:`Word2VecModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">vectorSize</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"vectorSize"</span><span class="p">,</span> | 
|  | <span class="s2">"the dimension of codes after transforming from words"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">numPartitions</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"numPartitions"</span><span class="p">,</span> | 
|  | <span class="s2">"number of partitions for sentences of words"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">minCount</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"minCount"</span><span class="p">,</span> | 
|  | <span class="s2">"the minimum number of times a token must appear to be included in the "</span> | 
|  | <span class="o">+</span> <span class="s2">"word2vec model's vocabulary"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">windowSize</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"windowSize"</span><span class="p">,</span> | 
|  | <span class="s2">"the window size (context words from [-window, window]). Default value is 5"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">maxSentenceLength</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"maxSentenceLength"</span><span class="p">,</span> | 
|  | <span class="s2">"Maximum length (in words) of each sentence in the input data. "</span> | 
|  | <span class="o">+</span> <span class="s2">"Any sentence longer than this threshold will "</span> | 
|  | <span class="o">+</span> <span class="s2">"be divided into chunks up to the size."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_Word2VecParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span> | 
|  | <span class="n">vectorSize</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> | 
|  | <span class="n">minCount</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> | 
|  | <span class="n">numPartitions</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> | 
|  | <span class="n">stepSize</span><span class="o">=</span><span class="mf">0.025</span><span class="p">,</span> | 
|  | <span class="n">maxIter</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> | 
|  | <span class="n">windowSize</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> | 
|  | <span class="n">maxSentenceLength</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getVectorSize</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of vectorSize or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vectorSize</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getNumPartitions</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of numPartitions or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">numPartitions</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getMinCount</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of minCount or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">minCount</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getWindowSize</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of windowSize or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">windowSize</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getMaxSentenceLength</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of maxSentenceLength or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">maxSentenceLength</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="Word2Vec"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2Vec.html#pyspark.ml.feature.Word2Vec">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">Word2Vec</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">[</span><span class="s2">"Word2VecModel"</span><span class="p">],</span> | 
|  | <span class="n">_Word2VecParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"Word2Vec"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further</span> | 
|  | <span class="sd">    natural language processing or machine learning process.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> sent = ("a b " * 100 + "a c " * 10).split(" ")</span> | 
|  | <span class="sd">    >>> doc = spark.createDataFrame([(sent,), (sent,)], ["sentence"])</span> | 
|  | <span class="sd">    >>> word2Vec = Word2Vec(vectorSize=5, seed=42, inputCol="sentence", outputCol="model")</span> | 
|  | <span class="sd">    >>> word2Vec.setMaxIter(10)</span> | 
|  | <span class="sd">    Word2Vec...</span> | 
|  | <span class="sd">    >>> word2Vec.getMaxIter()</span> | 
|  | <span class="sd">    10</span> | 
|  | <span class="sd">    >>> word2Vec.clear(word2Vec.maxIter)</span> | 
|  | <span class="sd">    >>> model = word2Vec.fit(doc)</span> | 
|  | <span class="sd">    >>> model.getMinCount()</span> | 
|  | <span class="sd">    5</span> | 
|  | <span class="sd">    >>> model.setInputCol("sentence")</span> | 
|  | <span class="sd">    Word2VecModel...</span> | 
|  | <span class="sd">    >>> model.getVectors().show()</span> | 
|  | <span class="sd">    +----+--------------------+</span> | 
|  | <span class="sd">    |word|              vector|</span> | 
|  | <span class="sd">    +----+--------------------+</span> | 
|  | <span class="sd">    |   a|[0.0951...</span> | 
|  | <span class="sd">    |   b|[-1.202...</span> | 
|  | <span class="sd">    |   c|[0.3015...</span> | 
|  | <span class="sd">    +----+--------------------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> model.findSynonymsArray("a", 2)</span> | 
|  | <span class="sd">    [('b', 0.015859...), ('c', -0.568079...)]</span> | 
|  | <span class="sd">    >>> from pyspark.sql.functions import format_number as fmt</span> | 
|  | <span class="sd">    >>> model.findSynonyms("a", 2).select("word", fmt("similarity", 5).alias("similarity")).show()</span> | 
|  | <span class="sd">    +----+----------+</span> | 
|  | <span class="sd">    |word|similarity|</span> | 
|  | <span class="sd">    +----+----------+</span> | 
|  | <span class="sd">    |   b|   0.01586|</span> | 
|  | <span class="sd">    |   c|  -0.56808|</span> | 
|  | <span class="sd">    +----+----------+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> model.transform(doc).head().model</span> | 
|  | <span class="sd">    DenseVector([-0.4833, 0.1855, -0.273, -0.0509, -0.4769])</span> | 
|  | <span class="sd">    >>> word2vecPath = temp_path + "/word2vec"</span> | 
|  | <span class="sd">    >>> word2Vec.save(word2vecPath)</span> | 
|  | <span class="sd">    >>> loadedWord2Vec = Word2Vec.load(word2vecPath)</span> | 
|  | <span class="sd">    >>> loadedWord2Vec.getVectorSize() == word2Vec.getVectorSize()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedWord2Vec.getNumPartitions() == word2Vec.getNumPartitions()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedWord2Vec.getMinCount() == word2Vec.getMinCount()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/word2vec-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = Word2VecModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.getVectors().first().word == model.getVectors().first().word</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.getVectors().first().vector == model.getVectors().first().vector</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.transform(doc).take(1) == model.transform(doc).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">vectorSize</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span> | 
|  | <span class="n">minCount</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> | 
|  | <span class="n">numPartitions</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> | 
|  | <span class="n">stepSize</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.025</span><span class="p">,</span> | 
|  | <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> | 
|  | <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">windowSize</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> | 
|  | <span class="n">maxSentenceLength</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1000</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, \</span> | 
|  | <span class="sd">                 maxIter=1, seed=None, inputCol=None, outputCol=None, windowSize=5, \</span> | 
|  | <span class="sd">                 maxSentenceLength=1000)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">Word2Vec</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.Word2Vec"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2Vec.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2Vec.html#pyspark.ml.feature.Word2Vec.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">vectorSize</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span> | 
|  | <span class="n">minCount</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> | 
|  | <span class="n">numPartitions</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> | 
|  | <span class="n">stepSize</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.025</span><span class="p">,</span> | 
|  | <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> | 
|  | <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">windowSize</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> | 
|  | <span class="n">maxSentenceLength</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1000</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"Word2Vec"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \</span> | 
|  | <span class="sd">                  seed=None, inputCol=None, outputCol=None, windowSize=5, \</span> | 
|  | <span class="sd">                  maxSentenceLength=1000)</span> | 
|  | <span class="sd">        Sets params for this Word2Vec.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2Vec.setVectorSize"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2Vec.html#pyspark.ml.feature.Word2Vec.setVectorSize">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setVectorSize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Word2Vec"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`vectorSize`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">vectorSize</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2Vec.setNumPartitions"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2Vec.html#pyspark.ml.feature.Word2Vec.setNumPartitions">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setNumPartitions</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Word2Vec"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`numPartitions`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">numPartitions</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2Vec.setMinCount"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2Vec.html#pyspark.ml.feature.Word2Vec.setMinCount">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMinCount</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Word2Vec"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`minCount`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minCount</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2Vec.setWindowSize"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2Vec.html#pyspark.ml.feature.Word2Vec.setWindowSize">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setWindowSize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Word2Vec"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`windowSize`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">windowSize</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2Vec.setMaxSentenceLength"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2Vec.html#pyspark.ml.feature.Word2Vec.setMaxSentenceLength">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setMaxSentenceLength</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Word2Vec"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`maxSentenceLength`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxSentenceLength</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2Vec.setMaxIter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2Vec.html#pyspark.ml.feature.Word2Vec.setMaxIter">[docs]</a>    <span class="k">def</span> <span class="nf">setMaxIter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Word2Vec"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`maxIter`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2Vec.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2Vec.html#pyspark.ml.feature.Word2Vec.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Word2Vec"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2Vec.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2Vec.html#pyspark.ml.feature.Word2Vec.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Word2Vec"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2Vec.setSeed"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2Vec.html#pyspark.ml.feature.Word2Vec.setSeed">[docs]</a>    <span class="k">def</span> <span class="nf">setSeed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Word2Vec"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`seed`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2Vec.setStepSize"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2Vec.html#pyspark.ml.feature.Word2Vec.setStepSize">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setStepSize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Word2Vec"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`stepSize`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">stepSize</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Word2VecModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">Word2VecModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="Word2VecModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2VecModel.html#pyspark.ml.feature.Word2VecModel">[docs]</a><span class="k">class</span> <span class="nc">Word2VecModel</span><span class="p">(</span><span class="n">JavaModel</span><span class="p">,</span> <span class="n">_Word2VecParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"Word2VecModel"</span><span class="p">],</span> <span class="n">JavaMLWritable</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`Word2Vec`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.4.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2VecModel.getVectors"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2VecModel.html#pyspark.ml.feature.Word2VecModel.getVectors">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getVectors</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Returns the vector representation of the words as a dataframe</span> | 
|  | <span class="sd">        with two fields, word and vector.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"getVectors"</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2VecModel.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2VecModel.html#pyspark.ml.feature.Word2VecModel.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Word2VecModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2VecModel.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2VecModel.html#pyspark.ml.feature.Word2VecModel.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"Word2VecModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2VecModel.findSynonyms"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2VecModel.html#pyspark.ml.feature.Word2VecModel.findSynonyms">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">findSynonyms</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">word</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Vector</span><span class="p">],</span> <span class="n">num</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Find "num" number of words closest in similarity to "word".</span> | 
|  | <span class="sd">        word can be a string or vector representation.</span> | 
|  | <span class="sd">        Returns a dataframe with two fields word and similarity (which</span> | 
|  | <span class="sd">        gives the cosine similarity).</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">word</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> | 
|  | <span class="n">word</span> <span class="o">=</span> <span class="n">_convert_to_vector</span><span class="p">(</span><span class="n">word</span><span class="p">)</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"findSynonyms"</span><span class="p">,</span> <span class="n">word</span><span class="p">,</span> <span class="n">num</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="Word2VecModel.findSynonymsArray"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.Word2VecModel.html#pyspark.ml.feature.Word2VecModel.findSynonymsArray">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">findSynonymsArray</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">word</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Vector</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span> <span class="n">num</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Find "num" number of words closest in similarity to "word".</span> | 
|  | <span class="sd">        word can be a string or vector representation.</span> | 
|  | <span class="sd">        Returns an array with two fields word and similarity (which</span> | 
|  | <span class="sd">        gives the cosine similarity).</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">word</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span> | 
|  | <span class="n">word</span> <span class="o">=</span> <span class="n">_convert_to_vector</span><span class="p">(</span><span class="n">word</span><span class="p">)</span> | 
|  | <span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> | 
|  | <span class="n">tuples</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span><span class="o">.</span><span class="n">findSynonymsArray</span><span class="p">(</span><span class="n">word</span><span class="p">,</span> <span class="n">num</span><span class="p">)</span> | 
|  | <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">st</span><span class="p">:</span> <span class="p">(</span><span class="n">st</span><span class="o">.</span><span class="n">_1</span><span class="p">(),</span> <span class="n">st</span><span class="o">.</span><span class="n">_2</span><span class="p">()),</span> <span class="nb">list</span><span class="p">(</span><span class="n">tuples</span><span class="p">)))</span></div></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_PCAParams</span><span class="p">(</span><span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`PCA` and :py:class:`PCAModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">k</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"k"</span><span class="p">,</span> | 
|  | <span class="s2">"the number of principal components"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getK</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of k or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">k</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="PCA"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.PCA.html#pyspark.ml.feature.PCA">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">PCA</span><span class="p">(</span><span class="n">JavaEstimator</span><span class="p">[</span><span class="s2">"PCAModel"</span><span class="p">],</span> <span class="n">_PCAParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"PCA"</span><span class="p">],</span> <span class="n">JavaMLWritable</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    PCA trains a model to project vectors to a lower dimensional space of the</span> | 
|  | <span class="sd">    top :py:attr:`k` principal components.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.5.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> data = [(Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([2.0, 0.0, 3.0, 4.0, 5.0]),),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0]),)]</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame(data,["features"])</span> | 
|  | <span class="sd">    >>> pca = PCA(k=2, inputCol="features")</span> | 
|  | <span class="sd">    >>> pca.setOutputCol("pca_features")</span> | 
|  | <span class="sd">    PCA...</span> | 
|  | <span class="sd">    >>> model = pca.fit(df)</span> | 
|  | <span class="sd">    >>> model.getK()</span> | 
|  | <span class="sd">    2</span> | 
|  | <span class="sd">    >>> model.setOutputCol("output")</span> | 
|  | <span class="sd">    PCAModel...</span> | 
|  | <span class="sd">    >>> model.transform(df).collect()[0].output</span> | 
|  | <span class="sd">    DenseVector([1.648..., -4.013...])</span> | 
|  | <span class="sd">    >>> model.explainedVariance</span> | 
|  | <span class="sd">    DenseVector([0.794..., 0.205...])</span> | 
|  | <span class="sd">    >>> pcaPath = temp_path + "/pca"</span> | 
|  | <span class="sd">    >>> pca.save(pcaPath)</span> | 
|  | <span class="sd">    >>> loadedPca = PCA.load(pcaPath)</span> | 
|  | <span class="sd">    >>> loadedPca.getK() == pca.getK()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/pca-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = PCAModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.pc == model.pc</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.explainedVariance == model.explainedVariance</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">k</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, k=None, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">PCA</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.PCA"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="PCA.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.PCA.html#pyspark.ml.feature.PCA.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">k</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"PCA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, k=None, inputCol=None, outputCol=None)</span> | 
|  | <span class="sd">        Set params for this PCA.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="PCA.setK"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.PCA.html#pyspark.ml.feature.PCA.setK">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setK</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"PCA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`k`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">k</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="PCA.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.PCA.html#pyspark.ml.feature.PCA.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"PCA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="PCA.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.PCA.html#pyspark.ml.feature.PCA.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"PCA"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"PCAModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">PCAModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="PCAModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.PCAModel.html#pyspark.ml.feature.PCAModel">[docs]</a><span class="k">class</span> <span class="nc">PCAModel</span><span class="p">(</span><span class="n">JavaModel</span><span class="p">,</span> <span class="n">_PCAParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"PCAModel"</span><span class="p">],</span> <span class="n">JavaMLWritable</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`PCA`. Transforms vectors to a lower dimensional space.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.5.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="PCAModel.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.PCAModel.html#pyspark.ml.feature.PCAModel.setInputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"PCAModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="PCAModel.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.PCAModel.html#pyspark.ml.feature.PCAModel.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"PCAModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">pc</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">DenseMatrix</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Returns a principal components Matrix.</span> | 
|  | <span class="sd">        Each column is one principal component.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"pc"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">explainedVariance</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">DenseVector</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Returns a vector of proportions of variance</span> | 
|  | <span class="sd">        explained by each principal component.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"explainedVariance"</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_RFormulaParams</span><span class="p">(</span><span class="n">HasFeaturesCol</span><span class="p">,</span> <span class="n">HasLabelCol</span><span class="p">,</span> <span class="n">HasHandleInvalid</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`RFormula` and :py:class:`RFormula`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.0.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">formula</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> <span class="s2">"formula"</span><span class="p">,</span> <span class="s2">"R model formula"</span><span class="p">,</span> <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">forceIndexLabel</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"forceIndexLabel"</span><span class="p">,</span> | 
|  | <span class="s2">"Force to index label whether it is numeric or string"</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toBoolean</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">stringIndexerOrderType</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"stringIndexerOrderType"</span><span class="p">,</span> | 
|  | <span class="s2">"How to order categories of a string feature column used by "</span> | 
|  | <span class="o">+</span> <span class="s2">"StringIndexer. The last category after ordering is dropped "</span> | 
|  | <span class="o">+</span> <span class="s2">"when encoding strings. Supported options: frequencyDesc, "</span> | 
|  | <span class="o">+</span> <span class="s2">"frequencyAsc, alphabetDesc, alphabetAsc. The default value "</span> | 
|  | <span class="o">+</span> <span class="s2">"is frequencyDesc. When the ordering is set to alphabetDesc, "</span> | 
|  | <span class="o">+</span> <span class="s2">"RFormula drops the same category as R when encoding strings."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"handleInvalid"</span><span class="p">,</span> | 
|  | <span class="s2">"how to handle invalid entries. "</span> | 
|  | <span class="o">+</span> <span class="s2">"Options are 'skip' (filter out rows with invalid values), "</span> | 
|  | <span class="o">+</span> <span class="s2">"'error' (throw an error), or 'keep' (put invalid data in a special "</span> | 
|  | <span class="o">+</span> <span class="s2">"additional bucket, at index numLabels)."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_RFormulaParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span> | 
|  | <span class="n">forceIndexLabel</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">stringIndexerOrderType</span><span class="o">=</span><span class="s2">"frequencyDesc"</span><span class="p">,</span> <span class="n">handleInvalid</span><span class="o">=</span><span class="s2">"error"</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getFormula</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`formula`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">formula</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getForceIndexLabel</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`forceIndexLabel`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">forceIndexLabel</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getStringIndexerOrderType</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of :py:attr:`stringIndexerOrderType` or its default value 'frequencyDesc'.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">stringIndexerOrderType</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="RFormula"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RFormula.html#pyspark.ml.feature.RFormula">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">RFormula</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">[</span><span class="s2">"RFormulaModel"</span><span class="p">],</span> | 
|  | <span class="n">_RFormulaParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"RFormula"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Implements the transforms required for fitting a dataset against an</span> | 
|  | <span class="sd">    R model formula. Currently we support a limited subset of the R</span> | 
|  | <span class="sd">    operators, including '~', '.', ':', '+', '-', '*', and '^'.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.5.0</span> | 
|  |  | 
|  | <span class="sd">    Notes</span> | 
|  | <span class="sd">    -----</span> | 
|  | <span class="sd">    Also see the `R formula docs</span> | 
|  | <span class="sd">    <http://stat.ethz.ch/R-manual/R-patched/library/stats/html/formula.html>`_.</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame([</span> | 
|  | <span class="sd">    ...     (1.0, 1.0, "a"),</span> | 
|  | <span class="sd">    ...     (0.0, 2.0, "b"),</span> | 
|  | <span class="sd">    ...     (0.0, 0.0, "a")</span> | 
|  | <span class="sd">    ... ], ["y", "x", "s"])</span> | 
|  | <span class="sd">    >>> rf = RFormula(formula="y ~ x + s")</span> | 
|  | <span class="sd">    >>> model = rf.fit(df)</span> | 
|  | <span class="sd">    >>> model.getLabelCol()</span> | 
|  | <span class="sd">    'label'</span> | 
|  | <span class="sd">    >>> model.transform(df).show()</span> | 
|  | <span class="sd">    +---+---+---+---------+-----+</span> | 
|  | <span class="sd">    |  y|  x|  s| features|label|</span> | 
|  | <span class="sd">    +---+---+---+---------+-----+</span> | 
|  | <span class="sd">    |1.0|1.0|  a|[1.0,1.0]|  1.0|</span> | 
|  | <span class="sd">    |0.0|2.0|  b|[2.0,0.0]|  0.0|</span> | 
|  | <span class="sd">    |0.0|0.0|  a|[0.0,1.0]|  0.0|</span> | 
|  | <span class="sd">    +---+---+---+---------+-----+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> rf.fit(df, {rf.formula: "y ~ . - s"}).transform(df).show()</span> | 
|  | <span class="sd">    +---+---+---+--------+-----+</span> | 
|  | <span class="sd">    |  y|  x|  s|features|label|</span> | 
|  | <span class="sd">    +---+---+---+--------+-----+</span> | 
|  | <span class="sd">    |1.0|1.0|  a|   [1.0]|  1.0|</span> | 
|  | <span class="sd">    |0.0|2.0|  b|   [2.0]|  0.0|</span> | 
|  | <span class="sd">    |0.0|0.0|  a|   [0.0]|  0.0|</span> | 
|  | <span class="sd">    +---+---+---+--------+-----+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> rFormulaPath = temp_path + "/rFormula"</span> | 
|  | <span class="sd">    >>> rf.save(rFormulaPath)</span> | 
|  | <span class="sd">    >>> loadedRF = RFormula.load(rFormulaPath)</span> | 
|  | <span class="sd">    >>> loadedRF.getFormula() == rf.getFormula()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedRF.getFeaturesCol() == rf.getFeaturesCol()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedRF.getLabelCol() == rf.getLabelCol()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedRF.getHandleInvalid() == rf.getHandleInvalid()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> str(loadedRF)</span> | 
|  | <span class="sd">    'RFormula(y ~ x + s) (uid=...)'</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/rFormulaModel"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = RFormulaModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.uid == model.uid</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.transform(df).show()</span> | 
|  | <span class="sd">    +---+---+---+---------+-----+</span> | 
|  | <span class="sd">    |  y|  x|  s| features|label|</span> | 
|  | <span class="sd">    +---+---+---+---------+-----+</span> | 
|  | <span class="sd">    |1.0|1.0|  a|[1.0,1.0]|  1.0|</span> | 
|  | <span class="sd">    |0.0|2.0|  b|[2.0,0.0]|  0.0|</span> | 
|  | <span class="sd">    |0.0|0.0|  a|[0.0,1.0]|  0.0|</span> | 
|  | <span class="sd">    +---+---+---+---------+-----+</span> | 
|  | <span class="sd">    ...</span> | 
|  | <span class="sd">    >>> str(loadedModel)</span> | 
|  | <span class="sd">    'RFormulaModel(ResolvedRFormula(label=y, terms=[x,s], hasIntercept=true)) (uid=...)'</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">formula</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> | 
|  | <span class="n">forceIndexLabel</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">stringIndexerOrderType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"frequencyDesc"</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, formula=None, featuresCol="features", labelCol="label", \</span> | 
|  | <span class="sd">                 forceIndexLabel=False, stringIndexerOrderType="frequencyDesc", \</span> | 
|  | <span class="sd">                 handleInvalid="error")</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">RFormula</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.RFormula"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="RFormula.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RFormula.html#pyspark.ml.feature.RFormula.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">formula</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> | 
|  | <span class="n">forceIndexLabel</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> | 
|  | <span class="n">stringIndexerOrderType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"frequencyDesc"</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"RFormula"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, formula=None, featuresCol="features", labelCol="label", \</span> | 
|  | <span class="sd">                  forceIndexLabel=False, stringIndexerOrderType="frequencyDesc", \</span> | 
|  | <span class="sd">                  handleInvalid="error")</span> | 
|  | <span class="sd">        Sets params for RFormula.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RFormula.setFormula"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RFormula.html#pyspark.ml.feature.RFormula.setFormula">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.5.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFormula</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RFormula"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`formula`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">formula</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RFormula.setForceIndexLabel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RFormula.html#pyspark.ml.feature.RFormula.setForceIndexLabel">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setForceIndexLabel</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RFormula"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`forceIndexLabel`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">forceIndexLabel</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RFormula.setStringIndexerOrderType"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RFormula.html#pyspark.ml.feature.RFormula.setStringIndexerOrderType">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setStringIndexerOrderType</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RFormula"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`stringIndexerOrderType`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">stringIndexerOrderType</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RFormula.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RFormula.html#pyspark.ml.feature.RFormula.setFeaturesCol">[docs]</a>    <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RFormula"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featuresCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RFormula.setLabelCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RFormula.html#pyspark.ml.feature.RFormula.setLabelCol">[docs]</a>    <span class="k">def</span> <span class="nf">setLabelCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RFormula"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`labelCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">labelCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="RFormula.setHandleInvalid"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RFormula.html#pyspark.ml.feature.RFormula.setHandleInvalid">[docs]</a>    <span class="k">def</span> <span class="nf">setHandleInvalid</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RFormula"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`handleInvalid`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">handleInvalid</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RFormulaModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">RFormulaModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="n">formulaStr</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">getFormula</span><span class="p">()</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">isDefined</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">formula</span><span class="p">)</span> <span class="k">else</span> <span class="s2">""</span> | 
|  | <span class="k">return</span> <span class="s2">"RFormula(</span><span class="si">%s</span><span class="s2">) (uid=</span><span class="si">%s</span><span class="s2">)"</span> <span class="o">%</span> <span class="p">(</span><span class="n">formulaStr</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="RFormulaModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.RFormulaModel.html#pyspark.ml.feature.RFormulaModel">[docs]</a><span class="k">class</span> <span class="nc">RFormulaModel</span><span class="p">(</span><span class="n">JavaModel</span><span class="p">,</span> <span class="n">_RFormulaParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"RFormulaModel"</span><span class="p">],</span> <span class="n">JavaMLWritable</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`RFormula`. Fitting is required to determine the</span> | 
|  | <span class="sd">    factor levels of formula terms.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 1.5.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="n">resolvedFormula</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"resolvedFormula"</span><span class="p">)</span> | 
|  | <span class="k">return</span> <span class="s2">"RFormulaModel(</span><span class="si">%s</span><span class="s2">) (uid=</span><span class="si">%s</span><span class="s2">)"</span> <span class="o">%</span> <span class="p">(</span><span class="n">resolvedFormula</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_SelectorParams</span><span class="p">(</span><span class="n">HasFeaturesCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">,</span> <span class="n">HasLabelCol</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`Selector` and :py:class:`SelectorModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.1.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">selectorType</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"selectorType"</span><span class="p">,</span> | 
|  | <span class="s2">"The selector type. "</span> | 
|  | <span class="o">+</span> <span class="s2">"Supported options: numTopFeatures (default), percentile, fpr, fdr, fwe."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">numTopFeatures</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"numTopFeatures"</span><span class="p">,</span> | 
|  | <span class="s2">"Number of features that selector will select, ordered by ascending p-value. "</span> | 
|  | <span class="o">+</span> <span class="s2">"If the number of features is < numTopFeatures, then this will select "</span> | 
|  | <span class="o">+</span> <span class="s2">"all features."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">percentile</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"percentile"</span><span class="p">,</span> | 
|  | <span class="s2">"Percentile of features that selector "</span> <span class="o">+</span> <span class="s2">"will select, ordered by ascending p-value."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">fpr</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"fpr"</span><span class="p">,</span> | 
|  | <span class="s2">"The highest p-value for features to be kept."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">fdr</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"fdr"</span><span class="p">,</span> | 
|  | <span class="s2">"The upper bound of the expected false discovery rate."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">fwe</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"fwe"</span><span class="p">,</span> | 
|  | <span class="s2">"The upper bound of the expected family-wise error rate."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_SelectorParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span> | 
|  | <span class="n">numTopFeatures</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span> | 
|  | <span class="n">selectorType</span><span class="o">=</span><span class="s2">"numTopFeatures"</span><span class="p">,</span> | 
|  | <span class="n">percentile</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span> | 
|  | <span class="n">fpr</span><span class="o">=</span><span class="mf">0.05</span><span class="p">,</span> | 
|  | <span class="n">fdr</span><span class="o">=</span><span class="mf">0.05</span><span class="p">,</span> | 
|  | <span class="n">fwe</span><span class="o">=</span><span class="mf">0.05</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getSelectorType</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of selectorType or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">selectorType</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getNumTopFeatures</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of numTopFeatures or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">numTopFeatures</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getPercentile</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of percentile or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">percentile</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getFpr</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of fpr or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">fpr</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getFdr</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of fdr or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">fdr</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getFwe</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of fwe or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">fwe</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_Selector</span><span class="p">(</span><span class="n">JavaEstimator</span><span class="p">[</span><span class="n">JM</span><span class="p">],</span> <span class="n">_SelectorParams</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">,</span> <span class="n">JavaMLWritable</span><span class="p">,</span> <span class="n">Generic</span><span class="p">[</span><span class="n">JM</span><span class="p">]):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Mixin for Selectors.</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setSelectorType</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`selectorType`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">selectorType</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setNumTopFeatures</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`numTopFeatures`.</span> | 
|  | <span class="sd">        Only applicable when selectorType = "numTopFeatures".</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">numTopFeatures</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setPercentile</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`percentile`.</span> | 
|  | <span class="sd">        Only applicable when selectorType = "percentile".</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">percentile</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFpr</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`fpr`.</span> | 
|  | <span class="sd">        Only applicable when selectorType = "fpr".</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">fpr</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFdr</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`fdr`.</span> | 
|  | <span class="sd">        Only applicable when selectorType = "fdr".</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">fdr</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFwe</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`fwe`.</span> | 
|  | <span class="sd">        Only applicable when selectorType = "fwe".</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">fwe</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featuresCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">setLabelCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`labelCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">labelCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_SelectorModel</span><span class="p">(</span><span class="n">JavaModel</span><span class="p">,</span> <span class="n">_SelectorParams</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Mixin for Selector models.</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featuresCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">P</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">selectedFeatures</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        List of indices to select (filter).</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"selectedFeatures"</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="ChiSqSelector"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.ChiSqSelector.html#pyspark.ml.feature.ChiSqSelector">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">ChiSqSelector</span><span class="p">(</span> | 
|  | <span class="n">_Selector</span><span class="p">[</span><span class="s2">"ChiSqSelectorModel"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"ChiSqSelector"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Chi-Squared feature selection, which selects categorical features to use for predicting a</span> | 
|  | <span class="sd">    categorical label.</span> | 
|  | <span class="sd">    The selector supports different selection methods: `numTopFeatures`, `percentile`, `fpr`,</span> | 
|  | <span class="sd">    `fdr`, `fwe`.</span> | 
|  |  | 
|  | <span class="sd">     * `numTopFeatures` chooses a fixed number of top features according to a chi-squared test.</span> | 
|  |  | 
|  | <span class="sd">     * `percentile` is similar but chooses a fraction of all features</span> | 
|  | <span class="sd">       instead of a fixed number.</span> | 
|  |  | 
|  | <span class="sd">     * `fpr` chooses all features whose p-values are below a threshold,</span> | 
|  | <span class="sd">       thus controlling the false positive rate of selection.</span> | 
|  |  | 
|  | <span class="sd">     * `fdr` uses the `Benjamini-Hochberg procedure <https://en.wikipedia.org/wiki/</span> | 
|  | <span class="sd">       False_discovery_rate#Benjamini.E2.80.93Hochberg_procedure>`_</span> | 
|  | <span class="sd">       to choose all features whose false discovery rate is below a threshold.</span> | 
|  |  | 
|  | <span class="sd">     * `fwe` chooses all features whose p-values are below a threshold. The threshold is scaled by</span> | 
|  | <span class="sd">       1/numFeatures, thus controlling the family-wise error rate of selection.</span> | 
|  |  | 
|  | <span class="sd">    By default, the selection method is `numTopFeatures`, with the default number of top features</span> | 
|  | <span class="sd">    set to 50.</span> | 
|  |  | 
|  | <span class="sd">    .. deprecated:: 3.1.0</span> | 
|  | <span class="sd">        Use UnivariateFeatureSelector</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.0.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame(</span> | 
|  | <span class="sd">    ...    [(Vectors.dense([0.0, 0.0, 18.0, 1.0]), 1.0),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([0.0, 1.0, 12.0, 0.0]), 0.0),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([1.0, 0.0, 15.0, 0.1]), 0.0)],</span> | 
|  | <span class="sd">    ...    ["features", "label"])</span> | 
|  | <span class="sd">    >>> selector = ChiSqSelector(numTopFeatures=1, outputCol="selectedFeatures")</span> | 
|  | <span class="sd">    >>> model = selector.fit(df)</span> | 
|  | <span class="sd">    >>> model.getFeaturesCol()</span> | 
|  | <span class="sd">    'features'</span> | 
|  | <span class="sd">    >>> model.setFeaturesCol("features")</span> | 
|  | <span class="sd">    ChiSqSelectorModel...</span> | 
|  | <span class="sd">    >>> model.transform(df).head().selectedFeatures</span> | 
|  | <span class="sd">    DenseVector([18.0])</span> | 
|  | <span class="sd">    >>> model.selectedFeatures</span> | 
|  | <span class="sd">    [2]</span> | 
|  | <span class="sd">    >>> chiSqSelectorPath = temp_path + "/chi-sq-selector"</span> | 
|  | <span class="sd">    >>> selector.save(chiSqSelectorPath)</span> | 
|  | <span class="sd">    >>> loadedSelector = ChiSqSelector.load(chiSqSelectorPath)</span> | 
|  | <span class="sd">    >>> loadedSelector.getNumTopFeatures() == selector.getNumTopFeatures()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/chi-sq-selector-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = ChiSqSelectorModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.selectedFeatures == model.selectedFeatures</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">numTopFeatures</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">50</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> | 
|  | <span class="n">selectorType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"numTopFeatures"</span><span class="p">,</span> | 
|  | <span class="n">percentile</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.1</span><span class="p">,</span> | 
|  | <span class="n">fpr</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.05</span><span class="p">,</span> | 
|  | <span class="n">fdr</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.05</span><span class="p">,</span> | 
|  | <span class="n">fwe</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.05</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, numTopFeatures=50, featuresCol="features", outputCol=None, \</span> | 
|  | <span class="sd">                 labelCol="label", selectorType="numTopFeatures", percentile=0.1, fpr=0.05, \</span> | 
|  | <span class="sd">                 fdr=0.05, fwe=0.05)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">ChiSqSelector</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.ChiSqSelector"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="ChiSqSelector.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.ChiSqSelector.html#pyspark.ml.feature.ChiSqSelector.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">numTopFeatures</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">50</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> | 
|  | <span class="n">selectorType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"numTopFeatures"</span><span class="p">,</span> | 
|  | <span class="n">percentile</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.1</span><span class="p">,</span> | 
|  | <span class="n">fpr</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.05</span><span class="p">,</span> | 
|  | <span class="n">fdr</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.05</span><span class="p">,</span> | 
|  | <span class="n">fwe</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.05</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"ChiSqSelector"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, numTopFeatures=50, featuresCol="features", outputCol=None, \</span> | 
|  | <span class="sd">                  labelCol="label", selectorType="numTopFeatures", percentile=0.1, fpr=0.05, \</span> | 
|  | <span class="sd">                  fdr=0.05, fwe=0.05)</span> | 
|  | <span class="sd">        Sets params for this ChiSqSelector.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"ChiSqSelectorModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">ChiSqSelectorModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="ChiSqSelectorModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.ChiSqSelectorModel.html#pyspark.ml.feature.ChiSqSelectorModel">[docs]</a><span class="k">class</span> <span class="nc">ChiSqSelectorModel</span><span class="p">(</span><span class="n">_SelectorModel</span><span class="p">,</span> <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"ChiSqSelectorModel"</span><span class="p">],</span> <span class="n">JavaMLWritable</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`ChiSqSelector`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.0.0</span> | 
|  | <span class="sd">    """</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="VectorSizeHint"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorSizeHint.html#pyspark.ml.feature.VectorSizeHint">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">VectorSizeHint</span><span class="p">(</span> | 
|  | <span class="n">JavaTransformer</span><span class="p">,</span> | 
|  | <span class="n">HasInputCol</span><span class="p">,</span> | 
|  | <span class="n">HasHandleInvalid</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"VectorSizeHint"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    A feature transformer that adds size information to the metadata of a vector column.</span> | 
|  | <span class="sd">    VectorAssembler needs size information for its input columns and cannot be used on streaming</span> | 
|  | <span class="sd">    dataframes without this metadata.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 2.3.0</span> | 
|  |  | 
|  | <span class="sd">    Notes</span> | 
|  | <span class="sd">    -----</span> | 
|  | <span class="sd">    VectorSizeHint modifies `inputCol` to include size metadata and does not have an outputCol.</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> from pyspark.ml import Pipeline, PipelineModel</span> | 
|  | <span class="sd">    >>> data = [(Vectors.dense([1., 2., 3.]), 4.)]</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame(data, ["vector", "float"])</span> | 
|  | <span class="sd">    >>></span> | 
|  | <span class="sd">    >>> sizeHint = VectorSizeHint(inputCol="vector", size=3, handleInvalid="skip")</span> | 
|  | <span class="sd">    >>> vecAssembler = VectorAssembler(inputCols=["vector", "float"], outputCol="assembled")</span> | 
|  | <span class="sd">    >>> pipeline = Pipeline(stages=[sizeHint, vecAssembler])</span> | 
|  | <span class="sd">    >>></span> | 
|  | <span class="sd">    >>> pipelineModel = pipeline.fit(df)</span> | 
|  | <span class="sd">    >>> pipelineModel.transform(df).head().assembled</span> | 
|  | <span class="sd">    DenseVector([1.0, 2.0, 3.0, 4.0])</span> | 
|  | <span class="sd">    >>> vectorSizeHintPath = temp_path + "/vector-size-hint-pipeline"</span> | 
|  | <span class="sd">    >>> pipelineModel.save(vectorSizeHintPath)</span> | 
|  | <span class="sd">    >>> loadedPipeline = PipelineModel.load(vectorSizeHintPath)</span> | 
|  | <span class="sd">    >>> loaded = loadedPipeline.transform(df).head().assembled</span> | 
|  | <span class="sd">    >>> expected = pipelineModel.transform(df).head().assembled</span> | 
|  | <span class="sd">    >>> loaded == expected</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="n">size</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> <span class="s2">"size"</span><span class="p">,</span> <span class="s2">"Size of vectors in column."</span><span class="p">,</span> <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"handleInvalid"</span><span class="p">,</span> | 
|  | <span class="s2">"How to handle invalid vectors in inputCol. Invalid vectors include "</span> | 
|  | <span class="s2">"nulls and vectors with the wrong size. The options are `skip` (filter "</span> | 
|  | <span class="s2">"out rows with invalid vectors), `error` (throw an error) and "</span> | 
|  | <span class="s2">"`optimistic` (do not check the vector size, and keep all rows). "</span> | 
|  | <span class="s2">"`error` by default."</span><span class="p">,</span> | 
|  | <span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">size</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, inputCol=None, size=None, handleInvalid="error")</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">VectorSizeHint</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.feature.VectorSizeHint"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">handleInvalid</span><span class="o">=</span><span class="s2">"error"</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorSizeHint.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorSizeHint.html#pyspark.ml.feature.VectorSizeHint.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">size</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">handleInvalid</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"error"</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorSizeHint"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, inputCol=None, size=None, handleInvalid="error")</span> | 
|  | <span class="sd">        Sets params for this VectorSizeHint.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorSizeHint.getSize"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorSizeHint.html#pyspark.ml.feature.VectorSizeHint.getSize">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getSize</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""Gets size param, the size of vectors in `inputCol`."""</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">size</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorSizeHint.setSize"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorSizeHint.html#pyspark.ml.feature.VectorSizeHint.setSize">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setSize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorSizeHint"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""Sets size param, the size of vectors in `inputCol`."""</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">size</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorSizeHint.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorSizeHint.html#pyspark.ml.feature.VectorSizeHint.setInputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorSizeHint"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`inputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VectorSizeHint.setHandleInvalid"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VectorSizeHint.html#pyspark.ml.feature.VectorSizeHint.setHandleInvalid">[docs]</a>    <span class="k">def</span> <span class="nf">setHandleInvalid</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VectorSizeHint"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`handleInvalid`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">handleInvalid</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_VarianceThresholdSelectorParams</span><span class="p">(</span><span class="n">HasFeaturesCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`VarianceThresholdSelector` and</span> | 
|  | <span class="sd">    :py:class:`VarianceThresholdSelectorModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.1.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">varianceThreshold</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"varianceThreshold"</span><span class="p">,</span> | 
|  | <span class="s2">"Param for variance threshold. Features with a variance not "</span> | 
|  | <span class="o">+</span> <span class="s2">"greater than this threshold will be removed. The default value "</span> | 
|  | <span class="o">+</span> <span class="s2">"is 0.0."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getVarianceThreshold</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of varianceThreshold or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">varianceThreshold</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="VarianceThresholdSelector"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VarianceThresholdSelector.html#pyspark.ml.feature.VarianceThresholdSelector">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">VarianceThresholdSelector</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">[</span><span class="s2">"VarianceThresholdSelectorModel"</span><span class="p">],</span> | 
|  | <span class="n">_VarianceThresholdSelectorParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"VarianceThresholdSelector"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Feature selector that removes all low-variance features. Features with a</span> | 
|  | <span class="sd">    (sample) variance not greater than the threshold will be removed. The default is to keep</span> | 
|  | <span class="sd">    all features with non-zero variance, i.e. remove the features that have the</span> | 
|  | <span class="sd">    same value in all samples.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.1.0</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame(</span> | 
|  | <span class="sd">    ...    [(Vectors.dense([6.0, 7.0, 0.0, 7.0, 6.0, 0.0]),),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([0.0, 9.0, 6.0, 0.0, 5.0, 9.0]),),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([0.0, 9.0, 3.0, 0.0, 5.0, 5.0]),),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([0.0, 9.0, 8.0, 5.0, 6.0, 4.0]),),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([8.0, 9.0, 6.0, 5.0, 4.0, 4.0]),),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([8.0, 9.0, 6.0, 0.0, 0.0, 0.0]),)],</span> | 
|  | <span class="sd">    ...    ["features"])</span> | 
|  | <span class="sd">    >>> selector = VarianceThresholdSelector(varianceThreshold=8.2, outputCol="selectedFeatures")</span> | 
|  | <span class="sd">    >>> model = selector.fit(df)</span> | 
|  | <span class="sd">    >>> model.getFeaturesCol()</span> | 
|  | <span class="sd">    'features'</span> | 
|  | <span class="sd">    >>> model.setFeaturesCol("features")</span> | 
|  | <span class="sd">    VarianceThresholdSelectorModel...</span> | 
|  | <span class="sd">    >>> model.transform(df).head().selectedFeatures</span> | 
|  | <span class="sd">    DenseVector([6.0, 7.0, 0.0])</span> | 
|  | <span class="sd">    >>> model.selectedFeatures</span> | 
|  | <span class="sd">    [0, 3, 5]</span> | 
|  | <span class="sd">    >>> varianceThresholdSelectorPath = temp_path + "/variance-threshold-selector"</span> | 
|  | <span class="sd">    >>> selector.save(varianceThresholdSelectorPath)</span> | 
|  | <span class="sd">    >>> loadedSelector = VarianceThresholdSelector.load(varianceThresholdSelectorPath)</span> | 
|  | <span class="sd">    >>> loadedSelector.getVarianceThreshold() == selector.getVarianceThreshold()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/variance-threshold-selector-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = VarianceThresholdSelectorModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.selectedFeatures == model.selectedFeatures</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">varianceThreshold</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, featuresCol="features", outputCol=None, varianceThreshold=0.0)</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">VarianceThresholdSelector</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> | 
|  | <span class="s2">"org.apache.spark.ml.feature.VarianceThresholdSelector"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> | 
|  | <span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">varianceThreshold</span><span class="o">=</span><span class="mf">0.0</span><span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="VarianceThresholdSelector.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VarianceThresholdSelector.html#pyspark.ml.feature.VarianceThresholdSelector.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">varianceThreshold</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"VarianceThresholdSelector"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, featuresCol="features", outputCol=None, varianceThreshold=0.0)</span> | 
|  | <span class="sd">        Sets params for this VarianceThresholdSelector.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VarianceThresholdSelector.setVarianceThreshold"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VarianceThresholdSelector.html#pyspark.ml.feature.VarianceThresholdSelector.setVarianceThreshold">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setVarianceThreshold</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VarianceThresholdSelector"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`varianceThreshold`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">varianceThreshold</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VarianceThresholdSelector.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VarianceThresholdSelector.html#pyspark.ml.feature.VarianceThresholdSelector.setFeaturesCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VarianceThresholdSelector"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featuresCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VarianceThresholdSelector.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VarianceThresholdSelector.html#pyspark.ml.feature.VarianceThresholdSelector.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VarianceThresholdSelector"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VarianceThresholdSelectorModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">VarianceThresholdSelectorModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="VarianceThresholdSelectorModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VarianceThresholdSelectorModel.html#pyspark.ml.feature.VarianceThresholdSelectorModel">[docs]</a><span class="k">class</span> <span class="nc">VarianceThresholdSelectorModel</span><span class="p">(</span> | 
|  | <span class="n">JavaModel</span><span class="p">,</span> | 
|  | <span class="n">_VarianceThresholdSelectorParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"VarianceThresholdSelectorModel"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`VarianceThresholdSelector`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.1.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="VarianceThresholdSelectorModel.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VarianceThresholdSelectorModel.html#pyspark.ml.feature.VarianceThresholdSelectorModel.setFeaturesCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VarianceThresholdSelectorModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featuresCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="VarianceThresholdSelectorModel.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.VarianceThresholdSelectorModel.html#pyspark.ml.feature.VarianceThresholdSelectorModel.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"VarianceThresholdSelectorModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.0"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">selectedFeatures</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        List of indices to select (filter).</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"selectedFeatures"</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <span class="k">class</span> <span class="nc">_UnivariateFeatureSelectorParams</span><span class="p">(</span><span class="n">HasFeaturesCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">,</span> <span class="n">HasLabelCol</span><span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Params for :py:class:`UnivariateFeatureSelector` and</span> | 
|  | <span class="sd">    :py:class:`UnivariateFeatureSelectorModel`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.1.0</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">featureType</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"featureType"</span><span class="p">,</span> | 
|  | <span class="s2">"The feature type. "</span> <span class="o">+</span> <span class="s2">"Supported options: categorical, continuous."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">labelType</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"labelType"</span><span class="p">,</span> | 
|  | <span class="s2">"The label type. "</span> <span class="o">+</span> <span class="s2">"Supported options: categorical, continuous."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">selectionMode</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"selectionMode"</span><span class="p">,</span> | 
|  | <span class="s2">"The selection mode. "</span> | 
|  | <span class="o">+</span> <span class="s2">"Supported options: numTopFeatures (default), percentile, fpr, "</span> | 
|  | <span class="o">+</span> <span class="s2">"fdr, fwe."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="n">selectionThreshold</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> | 
|  | <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> | 
|  | <span class="s2">"selectionThreshold"</span><span class="p">,</span> | 
|  | <span class="s2">"The upper bound of the "</span> <span class="o">+</span> <span class="s2">"features that selector will select."</span><span class="p">,</span> | 
|  | <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  |  | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">_UnivariateFeatureSelectorParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">selectionMode</span><span class="o">=</span><span class="s2">"numTopFeatures"</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.1"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getFeatureType</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of featureType or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">featureType</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.1"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getLabelType</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of labelType or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">labelType</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.1"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getSelectionMode</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of selectionMode or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">selectionMode</span><span class="p">)</span> | 
|  |  | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.1"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">getSelectionThreshold</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Gets the value of selectionThreshold or its default value.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">selectionThreshold</span><span class="p">)</span> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="UnivariateFeatureSelector"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.UnivariateFeatureSelector.html#pyspark.ml.feature.UnivariateFeatureSelector">[docs]</a><span class="nd">@inherit_doc</span> | 
|  | <span class="k">class</span> <span class="nc">UnivariateFeatureSelector</span><span class="p">(</span> | 
|  | <span class="n">JavaEstimator</span><span class="p">[</span><span class="s2">"UnivariateFeatureSelectorModel"</span><span class="p">],</span> | 
|  | <span class="n">_UnivariateFeatureSelectorParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"UnivariateFeatureSelector"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    UnivariateFeatureSelector</span> | 
|  | <span class="sd">    Feature selector based on univariate statistical tests against labels. Currently, Spark</span> | 
|  | <span class="sd">    supports three Univariate Feature Selectors: chi-squared, ANOVA F-test and F-value.</span> | 
|  | <span class="sd">    User can choose Univariate Feature Selector by setting `featureType` and `labelType`,</span> | 
|  | <span class="sd">    and Spark will pick the score function based on the specified `featureType` and `labelType`.</span> | 
|  |  | 
|  | <span class="sd">    The following combination of `featureType` and `labelType` are supported:</span> | 
|  |  | 
|  | <span class="sd">    - `featureType` `categorical` and `labelType` `categorical`, Spark uses chi-squared,</span> | 
|  | <span class="sd">      i.e. chi2 in sklearn.</span> | 
|  | <span class="sd">    - `featureType` `continuous` and `labelType` `categorical`, Spark uses ANOVA F-test,</span> | 
|  | <span class="sd">      i.e. f_classif in sklearn.</span> | 
|  | <span class="sd">    - `featureType` `continuous` and `labelType` `continuous`, Spark uses F-value,</span> | 
|  | <span class="sd">      i.e. f_regression in sklearn.</span> | 
|  |  | 
|  | <span class="sd">    The `UnivariateFeatureSelector` supports different selection modes: `numTopFeatures`,</span> | 
|  | <span class="sd">    `percentile`, `fpr`, `fdr`, `fwe`.</span> | 
|  |  | 
|  | <span class="sd">    - `numTopFeatures` chooses a fixed number of top features according to a according to a</span> | 
|  | <span class="sd">      hypothesis.</span> | 
|  | <span class="sd">    - `percentile` is similar but chooses a fraction of all features</span> | 
|  | <span class="sd">      instead of a fixed number.</span> | 
|  | <span class="sd">    - `fpr` chooses all features whose p-values are below a threshold,</span> | 
|  | <span class="sd">      thus controlling the false positive rate of selection.</span> | 
|  | <span class="sd">    - `fdr` uses the `Benjamini-Hochberg procedure \</span> | 
|  | <span class="sd">      <https://en.wikipedia.org/wiki/False_discovery_rate#Benjamini.E2.80.93Hochberg_procedure>`_</span> | 
|  | <span class="sd">      to choose all features whose false discovery rate is below a threshold.</span> | 
|  | <span class="sd">    - `fwe` chooses all features whose p-values are below a threshold. The threshold is scaled by</span> | 
|  | <span class="sd">      1 / `numFeatures`, thus controlling the family-wise error rate of selection.</span> | 
|  |  | 
|  | <span class="sd">    By default, the selection mode is `numTopFeatures`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.1.1</span> | 
|  |  | 
|  | <span class="sd">    Examples</span> | 
|  | <span class="sd">    --------</span> | 
|  | <span class="sd">    >>> from pyspark.ml.linalg import Vectors</span> | 
|  | <span class="sd">    >>> df = spark.createDataFrame(</span> | 
|  | <span class="sd">    ...    [(Vectors.dense([1.7, 4.4, 7.6, 5.8, 9.6, 2.3]), 3.0),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([8.8, 7.3, 5.7, 7.3, 2.2, 4.1]), 2.0),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([1.2, 9.5, 2.5, 3.1, 8.7, 2.5]), 1.0),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([3.7, 9.2, 6.1, 4.1, 7.5, 3.8]), 2.0),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([8.9, 5.2, 7.8, 8.3, 5.2, 3.0]), 4.0),</span> | 
|  | <span class="sd">    ...     (Vectors.dense([7.9, 8.5, 9.2, 4.0, 9.4, 2.1]), 4.0)],</span> | 
|  | <span class="sd">    ...    ["features", "label"])</span> | 
|  | <span class="sd">    >>> selector = UnivariateFeatureSelector(outputCol="selectedFeatures")</span> | 
|  | <span class="sd">    >>> selector.setFeatureType("continuous").setLabelType("categorical").setSelectionThreshold(1)</span> | 
|  | <span class="sd">    UnivariateFeatureSelector...</span> | 
|  | <span class="sd">    >>> model = selector.fit(df)</span> | 
|  | <span class="sd">    >>> model.getFeaturesCol()</span> | 
|  | <span class="sd">    'features'</span> | 
|  | <span class="sd">    >>> model.setFeaturesCol("features")</span> | 
|  | <span class="sd">    UnivariateFeatureSelectorModel...</span> | 
|  | <span class="sd">    >>> model.transform(df).head().selectedFeatures</span> | 
|  | <span class="sd">    DenseVector([7.6])</span> | 
|  | <span class="sd">    >>> model.selectedFeatures</span> | 
|  | <span class="sd">    [2]</span> | 
|  | <span class="sd">    >>> selectorPath = temp_path + "/selector"</span> | 
|  | <span class="sd">    >>> selector.save(selectorPath)</span> | 
|  | <span class="sd">    >>> loadedSelector = UnivariateFeatureSelector.load(selectorPath)</span> | 
|  | <span class="sd">    >>> loadedSelector.getSelectionThreshold() == selector.getSelectionThreshold()</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> modelPath = temp_path + "/selector-model"</span> | 
|  | <span class="sd">    >>> model.save(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel = UnivariateFeatureSelectorModel.load(modelPath)</span> | 
|  | <span class="sd">    >>> loadedModel.selectedFeatures == model.selectedFeatures</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)</span> | 
|  | <span class="sd">    True</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> | 
|  |  | 
|  | <span class="nd">@keyword_only</span> | 
|  | <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> | 
|  | <span class="n">selectionMode</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"numTopFeatures"</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        __init__(self, \\*, featuresCol="features", outputCol=None, \</span> | 
|  | <span class="sd">                 labelCol="label", selectionMode="numTopFeatures")</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="nb">super</span><span class="p">(</span><span class="n">UnivariateFeatureSelector</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> | 
|  | <span class="s2">"org.apache.spark.ml.feature.UnivariateFeatureSelector"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> | 
|  |  | 
|  | <div class="viewcode-block" id="UnivariateFeatureSelector.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.UnivariateFeatureSelector.html#pyspark.ml.feature.UnivariateFeatureSelector.setParams">[docs]</a>    <span class="nd">@keyword_only</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.1"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> | 
|  | <span class="bp">self</span><span class="p">,</span> | 
|  | <span class="o">*</span><span class="p">,</span> | 
|  | <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> | 
|  | <span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> | 
|  | <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> | 
|  | <span class="n">selectionMode</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"numTopFeatures"</span><span class="p">,</span> | 
|  | <span class="p">)</span> <span class="o">-></span> <span class="s2">"UnivariateFeatureSelector"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        setParams(self, \\*, featuresCol="features", outputCol=None, \</span> | 
|  | <span class="sd">                  labelCol="label", selectionMode="numTopFeatures")</span> | 
|  | <span class="sd">        Sets params for this UnivariateFeatureSelector.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="UnivariateFeatureSelector.setFeatureType"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.UnivariateFeatureSelector.html#pyspark.ml.feature.UnivariateFeatureSelector.setFeatureType">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.1"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFeatureType</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"UnivariateFeatureSelector"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featureType`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featureType</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="UnivariateFeatureSelector.setLabelType"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.UnivariateFeatureSelector.html#pyspark.ml.feature.UnivariateFeatureSelector.setLabelType">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.1"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setLabelType</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"UnivariateFeatureSelector"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`labelType`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">labelType</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="UnivariateFeatureSelector.setSelectionMode"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.UnivariateFeatureSelector.html#pyspark.ml.feature.UnivariateFeatureSelector.setSelectionMode">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.1"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setSelectionMode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"UnivariateFeatureSelector"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`selectionMode`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">selectionMode</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="UnivariateFeatureSelector.setSelectionThreshold"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.UnivariateFeatureSelector.html#pyspark.ml.feature.UnivariateFeatureSelector.setSelectionThreshold">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.1"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setSelectionThreshold</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"UnivariateFeatureSelector"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`selectionThreshold`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">selectionThreshold</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="UnivariateFeatureSelector.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.UnivariateFeatureSelector.html#pyspark.ml.feature.UnivariateFeatureSelector.setFeaturesCol">[docs]</a>    <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"UnivariateFeatureSelector"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featuresCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="UnivariateFeatureSelector.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.UnivariateFeatureSelector.html#pyspark.ml.feature.UnivariateFeatureSelector.setOutputCol">[docs]</a>    <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"UnivariateFeatureSelector"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="UnivariateFeatureSelector.setLabelCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.UnivariateFeatureSelector.html#pyspark.ml.feature.UnivariateFeatureSelector.setLabelCol">[docs]</a>    <span class="k">def</span> <span class="nf">setLabelCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"UnivariateFeatureSelector"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`labelCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">labelCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"UnivariateFeatureSelectorModel"</span><span class="p">:</span> | 
|  | <span class="k">return</span> <span class="n">UnivariateFeatureSelectorModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <div class="viewcode-block" id="UnivariateFeatureSelectorModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.UnivariateFeatureSelectorModel.html#pyspark.ml.feature.UnivariateFeatureSelectorModel">[docs]</a><span class="k">class</span> <span class="nc">UnivariateFeatureSelectorModel</span><span class="p">(</span> | 
|  | <span class="n">JavaModel</span><span class="p">,</span> | 
|  | <span class="n">_UnivariateFeatureSelectorParams</span><span class="p">,</span> | 
|  | <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"UnivariateFeatureSelectorModel"</span><span class="p">],</span> | 
|  | <span class="n">JavaMLWritable</span><span class="p">,</span> | 
|  | <span class="p">):</span> | 
|  | <span class="w">    </span><span class="sd">"""</span> | 
|  | <span class="sd">    Model fitted by :py:class:`UnivariateFeatureSelector`.</span> | 
|  |  | 
|  | <span class="sd">    .. versionadded:: 3.1.1</span> | 
|  | <span class="sd">    """</span> | 
|  |  | 
|  | <div class="viewcode-block" id="UnivariateFeatureSelectorModel.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.UnivariateFeatureSelectorModel.html#pyspark.ml.feature.UnivariateFeatureSelectorModel.setFeaturesCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.1"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"UnivariateFeatureSelectorModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`featuresCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <div class="viewcode-block" id="UnivariateFeatureSelectorModel.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.feature.UnivariateFeatureSelectorModel.html#pyspark.ml.feature.UnivariateFeatureSelectorModel.setOutputCol">[docs]</a>    <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.1"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"UnivariateFeatureSelectorModel"</span><span class="p">:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        Sets the value of :py:attr:`outputCol`.</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> | 
|  |  | 
|  | <span class="nd">@property</span> | 
|  | <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.1"</span><span class="p">)</span> | 
|  | <span class="k">def</span> <span class="nf">selectedFeatures</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span> | 
|  | <span class="w">        </span><span class="sd">"""</span> | 
|  | <span class="sd">        List of indices to select (filter).</span> | 
|  | <span class="sd">        """</span> | 
|  | <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"selectedFeatures"</span><span class="p">)</span></div> | 
|  |  | 
|  |  | 
|  | <span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span> | 
|  | <span class="kn">import</span> <span class="nn">doctest</span> | 
|  | <span class="kn">import</span> <span class="nn">sys</span> | 
|  | <span class="kn">import</span> <span class="nn">tempfile</span> | 
|  |  | 
|  | <span class="kn">import</span> <span class="nn">pyspark.ml.feature</span> | 
|  | <span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">Row</span><span class="p">,</span> <span class="n">SparkSession</span> | 
|  |  | 
|  | <span class="n">globs</span> <span class="o">=</span> <span class="nb">globals</span><span class="p">()</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> | 
|  | <span class="n">features</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">ml</span><span class="o">.</span><span class="n">feature</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> | 
|  | <span class="n">globs</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">features</span><span class="p">)</span> | 
|  |  | 
|  | <span class="c1"># The small batch size here ensures that we see multiple batches,</span> | 
|  | <span class="c1"># even in these small test examples:</span> | 
|  | <span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">master</span><span class="p">(</span><span class="s2">"local[2]"</span><span class="p">)</span><span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">"ml.feature tests"</span><span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span> | 
|  | <span class="n">sc</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">sparkContext</span> | 
|  | <span class="n">globs</span><span class="p">[</span><span class="s2">"sc"</span><span class="p">]</span> <span class="o">=</span> <span class="n">sc</span> | 
|  | <span class="n">globs</span><span class="p">[</span><span class="s2">"spark"</span><span class="p">]</span> <span class="o">=</span> <span class="n">spark</span> | 
|  | <span class="n">testData</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span> | 
|  | <span class="p">[</span> | 
|  | <span class="n">Row</span><span class="p">(</span><span class="nb">id</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">"a"</span><span class="p">),</span> | 
|  | <span class="n">Row</span><span class="p">(</span><span class="nb">id</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">"b"</span><span class="p">),</span> | 
|  | <span class="n">Row</span><span class="p">(</span><span class="nb">id</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">"c"</span><span class="p">),</span> | 
|  | <span class="n">Row</span><span class="p">(</span><span class="nb">id</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">"a"</span><span class="p">),</span> | 
|  | <span class="n">Row</span><span class="p">(</span><span class="nb">id</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">"a"</span><span class="p">),</span> | 
|  | <span class="n">Row</span><span class="p">(</span><span class="nb">id</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="s2">"c"</span><span class="p">),</span> | 
|  | <span class="p">],</span> | 
|  | <span class="mi">2</span><span class="p">,</span> | 
|  | <span class="p">)</span> | 
|  | <span class="n">globs</span><span class="p">[</span><span class="s2">"stringIndDf"</span><span class="p">]</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">testData</span><span class="p">)</span> | 
|  | <span class="n">temp_path</span> <span class="o">=</span> <span class="n">tempfile</span><span class="o">.</span><span class="n">mkdtemp</span><span class="p">()</span> | 
|  | <span class="n">globs</span><span class="p">[</span><span class="s2">"temp_path"</span><span class="p">]</span> <span class="o">=</span> <span class="n">temp_path</span> | 
|  | <span class="k">try</span><span class="p">:</span> | 
|  | <span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span><span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span> <span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span><span class="p">)</span> | 
|  | <span class="n">spark</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span> | 
|  | <span class="k">finally</span><span class="p">:</span> | 
|  | <span class="kn">from</span> <span class="nn">shutil</span> <span class="kn">import</span> <span class="n">rmtree</span> | 
|  |  | 
|  | <span class="k">try</span><span class="p">:</span> | 
|  | <span class="n">rmtree</span><span class="p">(</span><span class="n">temp_path</span><span class="p">)</span> | 
|  | <span class="k">except</span> <span class="ne">OSError</span><span class="p">:</span> | 
|  | <span class="k">pass</span> | 
|  | <span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span> | 
|  | <span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span> | 
|  | </pre></div> | 
|  |  | 
|  | </article> | 
|  |  | 
|  |  | 
|  |  | 
|  | <footer class="bd-footer-article"> | 
|  |  | 
|  | <div class="footer-article-items footer-article__inner"> | 
|  |  | 
|  | <div class="footer-article-item"><!-- Previous / next buttons --> | 
|  | <div class="prev-next-area"> | 
|  | </div></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </footer> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  |  | 
|  |  | 
|  | </div> | 
|  | <footer class="bd-footer-content"> | 
|  |  | 
|  | </footer> | 
|  |  | 
|  | </main> | 
|  | </div> | 
|  | </div> | 
|  |  | 
|  | <!-- Scripts loaded after <body> so the DOM is not blocked --> | 
|  | <script src="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52"></script> | 
|  | <script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52"></script> | 
|  |  | 
|  | <footer class="bd-footer"> | 
|  | <div class="bd-footer__inner bd-page-width"> | 
|  |  | 
|  | <div class="footer-items__start"> | 
|  |  | 
|  | <div class="footer-item"><p class="copyright"> | 
|  | Copyright @ 2024 The Apache Software Foundation, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>. | 
|  | </p></div> | 
|  |  | 
|  | <div class="footer-item"> | 
|  | <p class="sphinx-version"> | 
|  | Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 4.5.0. | 
|  | <br/> | 
|  | </p> | 
|  | </div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  |  | 
|  | <div class="footer-items__end"> | 
|  |  | 
|  | <div class="footer-item"><p class="theme-version"> | 
|  | Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.13.3. | 
|  | </p></div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </div> | 
|  |  | 
|  | </footer> | 
|  | </body> | 
|  | </html> |