| |
| |
| <!DOCTYPE html> |
| |
| |
| <html > |
| |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
| <title>pyspark.ml.regression — PySpark 4.0.0-preview1 documentation</title> |
| |
| |
| |
| <script data-cfasync="false"> |
| document.documentElement.dataset.mode = localStorage.getItem("mode") || ""; |
| document.documentElement.dataset.theme = localStorage.getItem("theme") || "light"; |
| </script> |
| |
| <!-- Loaded before other Sphinx assets --> |
| <link href="../../../_static/styles/theme.css?digest=e353d410970836974a52" rel="stylesheet" /> |
| <link href="../../../_static/styles/bootstrap.css?digest=e353d410970836974a52" rel="stylesheet" /> |
| <link href="../../../_static/styles/pydata-sphinx-theme.css?digest=e353d410970836974a52" rel="stylesheet" /> |
| |
| |
| <link href="../../../_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=e353d410970836974a52" rel="stylesheet" /> |
| <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" /> |
| <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" /> |
| <link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" /> |
| |
| <link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" /> |
| <link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" /> |
| <link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" /> |
| |
| <!-- Pre-loaded scripts that we'll load fully later --> |
| <link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52" /> |
| <link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52" /> |
| |
| <script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script> |
| <script src="../../../_static/jquery.js"></script> |
| <script src="../../../_static/underscore.js"></script> |
| <script src="../../../_static/doctools.js"></script> |
| <script src="../../../_static/clipboard.min.js"></script> |
| <script src="../../../_static/copybutton.js"></script> |
| <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script> |
| <script>DOCUMENTATION_OPTIONS.pagename = '_modules/pyspark/ml/regression';</script> |
| <link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/ml/regression.html" /> |
| <link rel="search" title="Search" href="../../../search.html" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> |
| <meta name="docsearch:language" content="None"> |
| |
| |
| <!-- Matomo --> |
| <script type="text/javascript"> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| _paq.push(["disableCookies"]); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '40']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo Code --> |
| |
| </head> |
| |
| |
| <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode=""> |
| |
| |
| |
| <a class="skip-link" href="#main-content">Skip to main content</a> |
| |
| <input type="checkbox" |
| class="sidebar-toggle" |
| name="__primary" |
| id="__primary"/> |
| <label class="overlay overlay-primary" for="__primary"></label> |
| |
| <input type="checkbox" |
| class="sidebar-toggle" |
| name="__secondary" |
| id="__secondary"/> |
| <label class="overlay overlay-secondary" for="__secondary"></label> |
| |
| <div class="search-button__wrapper"> |
| <div class="search-button__overlay"></div> |
| <div class="search-button__search-container"> |
| <form class="bd-search d-flex align-items-center" |
| action="../../../search.html" |
| method="get"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| <input type="search" |
| class="form-control" |
| name="q" |
| id="search-input" |
| placeholder="Search the docs ..." |
| aria-label="Search the docs ..." |
| autocomplete="off" |
| autocorrect="off" |
| autocapitalize="off" |
| spellcheck="false"/> |
| <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span> |
| </form></div> |
| </div> |
| |
| <nav class="bd-header navbar navbar-expand-lg bd-navbar"> |
| <div class="bd-header__inner bd-page-width"> |
| <label class="sidebar-toggle primary-toggle" for="__primary"> |
| <span class="fa-solid fa-bars"></span> |
| </label> |
| |
| <div class="navbar-header-items__start"> |
| |
| <div class="navbar-item"> |
| |
| |
| <a class="navbar-brand logo" href="../../../index.html"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <img src="../../../_static/spark-logo-light.png" class="logo__image only-light" alt="Logo image"/> |
| <script>document.write(`<img src="../../../_static/spark-logo-dark.png" class="logo__image only-dark" alt="Logo image"/>`);</script> |
| |
| |
| </a></div> |
| |
| </div> |
| |
| |
| <div class="col-lg-9 navbar-header-items"> |
| |
| <div class="me-auto navbar-header-items__center"> |
| |
| <div class="navbar-item"><nav class="navbar-nav"> |
| <p class="sidebar-header-items__title" |
| role="heading" |
| aria-level="1" |
| aria-label="Site Navigation"> |
| Site Navigation |
| </p> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../index.html"> |
| Overview |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../getting_started/index.html"> |
| Getting Started |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../user_guide/index.html"> |
| User Guides |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../reference/index.html"> |
| API Reference |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../development/index.html"> |
| Development |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../migration_guide/index.html"> |
| Migration Guides |
| </a> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| </div> |
| |
| |
| <div class="navbar-header-items__end"> |
| |
| <div class="navbar-item navbar-persistent--container"> |
| |
| <script> |
| document.write(` |
| <button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| </button> |
| `); |
| </script> |
| </div> |
| |
| |
| <div class="navbar-item"><!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <div id="version-button" class="dropdown"> |
| <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown"> |
| 4.0.0-preview1 |
| <span class="caret"></span> |
| </button> |
| <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div> |
| |
| <script type="text/javascript"> |
| // Function to construct the target URL from the JSON components |
| function buildURL(entry) { |
| var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja |
| template = template.replace("{version}", entry.version); |
| return template; |
| } |
| |
| // Function to check if corresponding page path exists in other version of docs |
| // and, if so, go there instead of the homepage of the other docs version |
| function checkPageExistsAndRedirect(event) { |
| const currentFilePath = "_modules/pyspark/ml/regression.html", |
| otherDocsHomepage = event.target.getAttribute("href"); |
| let tryUrl = `${otherDocsHomepage}${currentFilePath}`; |
| $.ajax({ |
| type: 'HEAD', |
| url: tryUrl, |
| // if the page exists, go there |
| success: function() { |
| location.href = tryUrl; |
| } |
| }).fail(function() { |
| location.href = otherDocsHomepage; |
| }); |
| return false; |
| } |
| |
| // Function to populate the version switcher |
| (function () { |
| // get JSON config |
| $.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) { |
| // create the nodes first (before AJAX calls) to ensure the order is |
| // correct (for now, links will go to doc version homepage) |
| $.each(data, function(index, entry) { |
| // if no custom name specified (e.g., "latest"), use version string |
| if (!("name" in entry)) { |
| entry.name = entry.version; |
| } |
| // construct the appropriate URL, and add it to the dropdown |
| entry.url = buildURL(entry); |
| const node = document.createElement("a"); |
| node.setAttribute("class", "list-group-item list-group-item-action py-1"); |
| node.setAttribute("href", `${entry.url}`); |
| node.textContent = `${entry.name}`; |
| node.onclick = checkPageExistsAndRedirect; |
| $("#version_switcher").append(node); |
| }); |
| }); |
| })(); |
| </script></div> |
| |
| <div class="navbar-item"> |
| <script> |
| document.write(` |
| <button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span> |
| <span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span> |
| <span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span> |
| </button> |
| `); |
| </script></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links navbar-nav" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span> |
| <label class="sr-only">GitHub</label></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span> |
| <label class="sr-only">PyPI</label></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| |
| <div class="navbar-persistent--mobile"> |
| <script> |
| document.write(` |
| <button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <i class="fa-solid fa-magnifying-glass"></i> |
| </button> |
| `); |
| </script> |
| </div> |
| |
| |
| |
| </div> |
| |
| </nav> |
| |
| <div class="bd-container"> |
| <div class="bd-container__inner bd-page-width"> |
| |
| <div class="bd-sidebar-primary bd-sidebar hide-on-wide"> |
| |
| |
| |
| <div class="sidebar-header-items sidebar-primary__section"> |
| |
| |
| <div class="sidebar-header-items__center"> |
| |
| <div class="navbar-item"><nav class="navbar-nav"> |
| <p class="sidebar-header-items__title" |
| role="heading" |
| aria-level="1" |
| aria-label="Site Navigation"> |
| Site Navigation |
| </p> |
| <ul class="bd-navbar-elements navbar-nav"> |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../index.html"> |
| Overview |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../getting_started/index.html"> |
| Getting Started |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../user_guide/index.html"> |
| User Guides |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../reference/index.html"> |
| API Reference |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../development/index.html"> |
| Development |
| </a> |
| </li> |
| |
| |
| <li class="nav-item"> |
| <a class="nav-link nav-internal" href="../../../migration_guide/index.html"> |
| Migration Guides |
| </a> |
| </li> |
| |
| </ul> |
| </nav></div> |
| |
| </div> |
| |
| |
| |
| <div class="sidebar-header-items__end"> |
| |
| <div class="navbar-item"><!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <div id="version-button" class="dropdown"> |
| <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown"> |
| 4.0.0-preview1 |
| <span class="caret"></span> |
| </button> |
| <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button"> |
| <!-- dropdown will be populated by javascript on page load --> |
| </div> |
| </div> |
| |
| <script type="text/javascript"> |
| // Function to construct the target URL from the JSON components |
| function buildURL(entry) { |
| var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja |
| template = template.replace("{version}", entry.version); |
| return template; |
| } |
| |
| // Function to check if corresponding page path exists in other version of docs |
| // and, if so, go there instead of the homepage of the other docs version |
| function checkPageExistsAndRedirect(event) { |
| const currentFilePath = "_modules/pyspark/ml/regression.html", |
| otherDocsHomepage = event.target.getAttribute("href"); |
| let tryUrl = `${otherDocsHomepage}${currentFilePath}`; |
| $.ajax({ |
| type: 'HEAD', |
| url: tryUrl, |
| // if the page exists, go there |
| success: function() { |
| location.href = tryUrl; |
| } |
| }).fail(function() { |
| location.href = otherDocsHomepage; |
| }); |
| return false; |
| } |
| |
| // Function to populate the version switcher |
| (function () { |
| // get JSON config |
| $.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) { |
| // create the nodes first (before AJAX calls) to ensure the order is |
| // correct (for now, links will go to doc version homepage) |
| $.each(data, function(index, entry) { |
| // if no custom name specified (e.g., "latest"), use version string |
| if (!("name" in entry)) { |
| entry.name = entry.version; |
| } |
| // construct the appropriate URL, and add it to the dropdown |
| entry.url = buildURL(entry); |
| const node = document.createElement("a"); |
| node.setAttribute("class", "list-group-item list-group-item-action py-1"); |
| node.setAttribute("href", `${entry.url}`); |
| node.textContent = `${entry.name}`; |
| node.onclick = checkPageExistsAndRedirect; |
| $("#version_switcher").append(node); |
| }); |
| }); |
| })(); |
| </script></div> |
| |
| <div class="navbar-item"> |
| <script> |
| document.write(` |
| <button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip"> |
| <span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span> |
| <span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span> |
| <span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span> |
| </button> |
| `); |
| </script></div> |
| |
| <div class="navbar-item"><ul class="navbar-icon-links navbar-nav" |
| aria-label="Icon Links"> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span> |
| <label class="sr-only">GitHub</label></a> |
| </li> |
| <li class="nav-item"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span> |
| <label class="sr-only">PyPI</label></a> |
| </li> |
| </ul></div> |
| |
| </div> |
| |
| </div> |
| |
| |
| <div class="sidebar-primary-items__end sidebar-primary__section"> |
| </div> |
| |
| <div id="rtd-footer-container"></div> |
| |
| |
| </div> |
| |
| <main id="main-content" class="bd-main"> |
| |
| |
| <div class="bd-content"> |
| <div class="bd-article-container"> |
| |
| <div class="bd-header-article"> |
| <div class="header-article-items header-article__inner"> |
| |
| <div class="header-article-items__start"> |
| |
| <div class="header-article-item"> |
| |
| |
| |
| <nav aria-label="Breadcrumbs"> |
| <ul class="bd-breadcrumbs" role="navigation" aria-label="Breadcrumb"> |
| |
| <li class="breadcrumb-item breadcrumb-home"> |
| <a href="../../../index.html" class="nav-link" aria-label="Home"> |
| <i class="fa-solid fa-home"></i> |
| </a> |
| </li> |
| |
| <li class="breadcrumb-item"><a href="../../index.html" class="nav-link">Module code</a></li> |
| |
| <li class="breadcrumb-item active" aria-current="page">pyspark.ml.regression</li> |
| </ul> |
| </nav> |
| </div> |
| |
| </div> |
| |
| |
| </div> |
| </div> |
| |
| |
| |
| |
| <div id="searchbox"></div> |
| <article class="bd-article" role="main"> |
| |
| <h1>Source code for pyspark.ml.regression</h1><div class="highlight"><pre> |
| <span></span><span class="c1">#</span> |
| <span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span> |
| <span class="c1"># contributor license agreements. See the NOTICE file distributed with</span> |
| <span class="c1"># this work for additional information regarding copyright ownership.</span> |
| <span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span> |
| <span class="c1"># (the "License"); you may not use this file except in compliance with</span> |
| <span class="c1"># the License. You may obtain a copy of the License at</span> |
| <span class="c1">#</span> |
| <span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span> |
| <span class="c1">#</span> |
| <span class="c1"># Unless required by applicable law or agreed to in writing, software</span> |
| <span class="c1"># distributed under the License is distributed on an "AS IS" BASIS,</span> |
| <span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span> |
| <span class="c1"># See the License for the specific language governing permissions and</span> |
| <span class="c1"># limitations under the License.</span> |
| <span class="c1">#</span> |
| |
| <span class="kn">import</span> <span class="nn">sys</span> |
| <span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">Generic</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">TypeVar</span><span class="p">,</span> <span class="n">TYPE_CHECKING</span> |
| <span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span> |
| |
| <span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">keyword_only</span><span class="p">,</span> <span class="n">since</span> |
| <span class="kn">from</span> <span class="nn">pyspark.ml</span> <span class="kn">import</span> <span class="n">Predictor</span><span class="p">,</span> <span class="n">PredictionModel</span> |
| <span class="kn">from</span> <span class="nn">pyspark.ml.base</span> <span class="kn">import</span> <span class="n">_PredictorParams</span> |
| <span class="kn">from</span> <span class="nn">pyspark.ml.param.shared</span> <span class="kn">import</span> <span class="p">(</span> |
| <span class="n">HasFeaturesCol</span><span class="p">,</span> |
| <span class="n">HasLabelCol</span><span class="p">,</span> |
| <span class="n">HasPredictionCol</span><span class="p">,</span> |
| <span class="n">HasWeightCol</span><span class="p">,</span> |
| <span class="n">Param</span><span class="p">,</span> |
| <span class="n">Params</span><span class="p">,</span> |
| <span class="n">TypeConverters</span><span class="p">,</span> |
| <span class="n">HasMaxIter</span><span class="p">,</span> |
| <span class="n">HasTol</span><span class="p">,</span> |
| <span class="n">HasFitIntercept</span><span class="p">,</span> |
| <span class="n">HasAggregationDepth</span><span class="p">,</span> |
| <span class="n">HasMaxBlockSizeInMB</span><span class="p">,</span> |
| <span class="n">HasRegParam</span><span class="p">,</span> |
| <span class="n">HasSolver</span><span class="p">,</span> |
| <span class="n">HasStepSize</span><span class="p">,</span> |
| <span class="n">HasSeed</span><span class="p">,</span> |
| <span class="n">HasElasticNetParam</span><span class="p">,</span> |
| <span class="n">HasStandardization</span><span class="p">,</span> |
| <span class="n">HasLoss</span><span class="p">,</span> |
| <span class="n">HasVarianceCol</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="kn">from</span> <span class="nn">pyspark.ml.tree</span> <span class="kn">import</span> <span class="p">(</span> |
| <span class="n">_DecisionTreeModel</span><span class="p">,</span> |
| <span class="n">_DecisionTreeParams</span><span class="p">,</span> |
| <span class="n">_TreeEnsembleModel</span><span class="p">,</span> |
| <span class="n">_RandomForestParams</span><span class="p">,</span> |
| <span class="n">_GBTParams</span><span class="p">,</span> |
| <span class="n">_TreeRegressorParams</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="kn">from</span> <span class="nn">pyspark.ml.base</span> <span class="kn">import</span> <span class="n">Transformer</span> |
| <span class="kn">from</span> <span class="nn">pyspark.ml.linalg</span> <span class="kn">import</span> <span class="n">Vector</span><span class="p">,</span> <span class="n">Matrix</span> |
| <span class="kn">from</span> <span class="nn">pyspark.ml.util</span> <span class="kn">import</span> <span class="p">(</span> |
| <span class="n">JavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">,</span> |
| <span class="n">HasTrainingSummary</span><span class="p">,</span> |
| <span class="n">GeneralJavaMLWritable</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="kn">from</span> <span class="nn">pyspark.ml.wrapper</span> <span class="kn">import</span> <span class="p">(</span> |
| <span class="n">JavaEstimator</span><span class="p">,</span> |
| <span class="n">JavaModel</span><span class="p">,</span> |
| <span class="n">JavaPredictor</span><span class="p">,</span> |
| <span class="n">JavaPredictionModel</span><span class="p">,</span> |
| <span class="n">JavaTransformer</span><span class="p">,</span> |
| <span class="n">JavaWrapper</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="kn">from</span> <span class="nn">pyspark.ml.common</span> <span class="kn">import</span> <span class="n">inherit_doc</span> |
| <span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">DataFrame</span> |
| |
| <span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span> |
| <span class="kn">from</span> <span class="nn">py4j.java_gateway</span> <span class="kn">import</span> <span class="n">JavaObject</span> |
| |
| <span class="n">T</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">"T"</span><span class="p">)</span> |
| <span class="n">M</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">"M"</span><span class="p">,</span> <span class="n">bound</span><span class="o">=</span><span class="n">Transformer</span><span class="p">)</span> |
| <span class="n">JM</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">"JM"</span><span class="p">,</span> <span class="n">bound</span><span class="o">=</span><span class="n">JavaTransformer</span><span class="p">)</span> |
| |
| |
| <span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span> |
| <span class="s2">"AFTSurvivalRegression"</span><span class="p">,</span> |
| <span class="s2">"AFTSurvivalRegressionModel"</span><span class="p">,</span> |
| <span class="s2">"DecisionTreeRegressor"</span><span class="p">,</span> |
| <span class="s2">"DecisionTreeRegressionModel"</span><span class="p">,</span> |
| <span class="s2">"GBTRegressor"</span><span class="p">,</span> |
| <span class="s2">"GBTRegressionModel"</span><span class="p">,</span> |
| <span class="s2">"GeneralizedLinearRegression"</span><span class="p">,</span> |
| <span class="s2">"GeneralizedLinearRegressionModel"</span><span class="p">,</span> |
| <span class="s2">"GeneralizedLinearRegressionSummary"</span><span class="p">,</span> |
| <span class="s2">"GeneralizedLinearRegressionTrainingSummary"</span><span class="p">,</span> |
| <span class="s2">"IsotonicRegression"</span><span class="p">,</span> |
| <span class="s2">"IsotonicRegressionModel"</span><span class="p">,</span> |
| <span class="s2">"LinearRegression"</span><span class="p">,</span> |
| <span class="s2">"LinearRegressionModel"</span><span class="p">,</span> |
| <span class="s2">"LinearRegressionSummary"</span><span class="p">,</span> |
| <span class="s2">"LinearRegressionTrainingSummary"</span><span class="p">,</span> |
| <span class="s2">"RandomForestRegressor"</span><span class="p">,</span> |
| <span class="s2">"RandomForestRegressionModel"</span><span class="p">,</span> |
| <span class="s2">"FMRegressor"</span><span class="p">,</span> |
| <span class="s2">"FMRegressionModel"</span><span class="p">,</span> |
| <span class="p">]</span> |
| |
| |
| <span class="k">class</span> <span class="nc">Regressor</span><span class="p">(</span><span class="n">Predictor</span><span class="p">[</span><span class="n">M</span><span class="p">],</span> <span class="n">_PredictorParams</span><span class="p">,</span> <span class="n">Generic</span><span class="p">[</span><span class="n">M</span><span class="p">],</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Regressor for regression tasks.</span> |
| |
| <span class="sd"> .. versionadded:: 3.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="k">pass</span> |
| |
| |
| <span class="k">class</span> <span class="nc">RegressionModel</span><span class="p">(</span><span class="n">PredictionModel</span><span class="p">[</span><span class="n">T</span><span class="p">],</span> <span class="n">_PredictorParams</span><span class="p">,</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model produced by a ``Regressor``.</span> |
| |
| <span class="sd"> .. versionadded:: 3.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="k">pass</span> |
| |
| |
| <span class="k">class</span> <span class="nc">_JavaRegressor</span><span class="p">(</span><span class="n">Regressor</span><span class="p">,</span> <span class="n">JavaPredictor</span><span class="p">[</span><span class="n">JM</span><span class="p">],</span> <span class="n">Generic</span><span class="p">[</span><span class="n">JM</span><span class="p">],</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Java Regressor for regression tasks.</span> |
| |
| <span class="sd"> .. versionadded:: 3.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="k">pass</span> |
| |
| |
| <span class="k">class</span> <span class="nc">_JavaRegressionModel</span><span class="p">(</span><span class="n">RegressionModel</span><span class="p">,</span> <span class="n">JavaPredictionModel</span><span class="p">[</span><span class="n">T</span><span class="p">],</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Java Model produced by a ``_JavaRegressor``.</span> |
| <span class="sd"> To be mixed in with :class:`pyspark.ml.JavaModel`</span> |
| |
| <span class="sd"> .. versionadded:: 3.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="k">pass</span> |
| |
| |
| <span class="k">class</span> <span class="nc">_LinearRegressionParams</span><span class="p">(</span> |
| <span class="n">_PredictorParams</span><span class="p">,</span> |
| <span class="n">HasRegParam</span><span class="p">,</span> |
| <span class="n">HasElasticNetParam</span><span class="p">,</span> |
| <span class="n">HasMaxIter</span><span class="p">,</span> |
| <span class="n">HasTol</span><span class="p">,</span> |
| <span class="n">HasFitIntercept</span><span class="p">,</span> |
| <span class="n">HasStandardization</span><span class="p">,</span> |
| <span class="n">HasWeightCol</span><span class="p">,</span> |
| <span class="n">HasSolver</span><span class="p">,</span> |
| <span class="n">HasAggregationDepth</span><span class="p">,</span> |
| <span class="n">HasLoss</span><span class="p">,</span> |
| <span class="n">HasMaxBlockSizeInMB</span><span class="p">,</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Params for :py:class:`LinearRegression` and :py:class:`LinearRegressionModel`.</span> |
| |
| <span class="sd"> .. versionadded:: 3.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">solver</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"solver"</span><span class="p">,</span> |
| <span class="s2">"The solver algorithm for optimization. Supported "</span> <span class="o">+</span> <span class="s2">"options: auto, normal, l-bfgs."</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="n">loss</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"loss"</span><span class="p">,</span> |
| <span class="s2">"The loss function to be optimized. Supported "</span> <span class="o">+</span> <span class="s2">"options: squaredError, huber."</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="n">epsilon</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"epsilon"</span><span class="p">,</span> |
| <span class="s2">"The shape parameter to control the amount of "</span> |
| <span class="o">+</span> <span class="s2">"robustness. Must be > 1.0. Only valid when loss is huber"</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">_LinearRegressionParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span> |
| <span class="n">maxIter</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> |
| <span class="n">regParam</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">tol</span><span class="o">=</span><span class="mf">1e-6</span><span class="p">,</span> |
| <span class="n">loss</span><span class="o">=</span><span class="s2">"squaredError"</span><span class="p">,</span> |
| <span class="n">epsilon</span><span class="o">=</span><span class="mf">1.35</span><span class="p">,</span> |
| <span class="n">maxBlockSizeInMB</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">getEpsilon</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of epsilon or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">epsilon</span><span class="p">)</span> |
| |
| |
| <div class="viewcode-block" id="LinearRegression"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegression.html#pyspark.ml.regression.LinearRegression">[docs]</a><span class="nd">@inherit_doc</span> |
| <span class="k">class</span> <span class="nc">LinearRegression</span><span class="p">(</span> |
| <span class="n">_JavaRegressor</span><span class="p">[</span><span class="s2">"LinearRegressionModel"</span><span class="p">],</span> |
| <span class="n">_LinearRegressionParams</span><span class="p">,</span> |
| <span class="n">JavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"LinearRegression"</span><span class="p">],</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Linear regression.</span> |
| |
| <span class="sd"> The learning objective is to minimize the specified loss function, with regularization.</span> |
| <span class="sd"> This supports two kinds of loss:</span> |
| |
| <span class="sd"> * squaredError (a.k.a squared loss)</span> |
| <span class="sd"> * huber (a hybrid of squared error for relatively small errors and absolute error for \</span> |
| <span class="sd"> relatively large ones, and we estimate the scale parameter from training data)</span> |
| |
| <span class="sd"> This supports multiple types of regularization:</span> |
| |
| <span class="sd"> * none (a.k.a. ordinary least squares)</span> |
| <span class="sd"> * L2 (ridge regression)</span> |
| <span class="sd"> * L1 (Lasso)</span> |
| <span class="sd"> * L2 + L1 (elastic net)</span> |
| |
| <span class="sd"> .. versionadded:: 1.4.0</span> |
| |
| <span class="sd"> Notes</span> |
| <span class="sd"> -----</span> |
| <span class="sd"> Fitting with huber loss only supports none and L2 regularization.</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.ml.linalg import Vectors</span> |
| <span class="sd"> >>> df = spark.createDataFrame([</span> |
| <span class="sd"> ... (1.0, 2.0, Vectors.dense(1.0)),</span> |
| <span class="sd"> ... (0.0, 2.0, Vectors.sparse(1, [], []))], ["label", "weight", "features"])</span> |
| <span class="sd"> >>> lr = LinearRegression(regParam=0.0, solver="normal", weightCol="weight")</span> |
| <span class="sd"> >>> lr.setMaxIter(5)</span> |
| <span class="sd"> LinearRegression...</span> |
| <span class="sd"> >>> lr.getMaxIter()</span> |
| <span class="sd"> 5</span> |
| <span class="sd"> >>> lr.setRegParam(0.1)</span> |
| <span class="sd"> LinearRegression...</span> |
| <span class="sd"> >>> lr.getRegParam()</span> |
| <span class="sd"> 0.1</span> |
| <span class="sd"> >>> lr.setRegParam(0.0)</span> |
| <span class="sd"> LinearRegression...</span> |
| <span class="sd"> >>> model = lr.fit(df)</span> |
| <span class="sd"> >>> model.setFeaturesCol("features")</span> |
| <span class="sd"> LinearRegressionModel...</span> |
| <span class="sd"> >>> model.setPredictionCol("newPrediction")</span> |
| <span class="sd"> LinearRegressionModel...</span> |
| <span class="sd"> >>> model.getMaxIter()</span> |
| <span class="sd"> 5</span> |
| <span class="sd"> >>> model.getMaxBlockSizeInMB()</span> |
| <span class="sd"> 0.0</span> |
| <span class="sd"> >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])</span> |
| <span class="sd"> >>> abs(model.predict(test0.head().features) - (-1.0)) < 0.001</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> abs(model.transform(test0).head().newPrediction - (-1.0)) < 0.001</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> abs(model.coefficients[0] - 1.0) < 0.001</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> abs(model.intercept - 0.0) < 0.001</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])</span> |
| <span class="sd"> >>> abs(model.transform(test1).head().newPrediction - 1.0) < 0.001</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> lr.setParams(featuresCol="vector")</span> |
| <span class="sd"> LinearRegression...</span> |
| <span class="sd"> >>> lr_path = temp_path + "/lr"</span> |
| <span class="sd"> >>> lr.save(lr_path)</span> |
| <span class="sd"> >>> lr2 = LinearRegression.load(lr_path)</span> |
| <span class="sd"> >>> lr2.getMaxIter()</span> |
| <span class="sd"> 5</span> |
| <span class="sd"> >>> model_path = temp_path + "/lr_model"</span> |
| <span class="sd"> >>> model.save(model_path)</span> |
| <span class="sd"> >>> model2 = LinearRegressionModel.load(model_path)</span> |
| <span class="sd"> >>> model.coefficients[0] == model2.coefficients[0]</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.intercept == model2.intercept</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.transform(test0).take(1) == model2.transform(test0).take(1)</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.numFeatures</span> |
| <span class="sd"> 1</span> |
| <span class="sd"> >>> model.write().format("pmml").save(model_path + "_2")</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> |
| |
| <span class="nd">@keyword_only</span> |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span> |
| <span class="n">regParam</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">elasticNetParam</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">tol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1e-6</span><span class="p">,</span> |
| <span class="n">fitIntercept</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="n">standardization</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="n">solver</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"auto"</span><span class="p">,</span> |
| <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">aggregationDepth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> |
| <span class="n">loss</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"squaredError"</span><span class="p">,</span> |
| <span class="n">epsilon</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.35</span><span class="p">,</span> |
| <span class="n">maxBlockSizeInMB</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \</span> |
| <span class="sd"> maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \</span> |
| <span class="sd"> standardization=True, solver="auto", weightCol=None, aggregationDepth=2, \</span> |
| <span class="sd"> loss="squaredError", epsilon=1.35, maxBlockSizeInMB=0.0)</span> |
| <span class="sd"> """</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">LinearRegression</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> |
| <span class="s2">"org.apache.spark.ml.regression.LinearRegression"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> |
| <span class="p">)</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="LinearRegression.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegression.html#pyspark.ml.regression.LinearRegression.setParams">[docs]</a> <span class="nd">@keyword_only</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span> |
| <span class="n">regParam</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">elasticNetParam</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">tol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1e-6</span><span class="p">,</span> |
| <span class="n">fitIntercept</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="n">standardization</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="n">solver</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"auto"</span><span class="p">,</span> |
| <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">aggregationDepth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> |
| <span class="n">loss</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"squaredError"</span><span class="p">,</span> |
| <span class="n">epsilon</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.35</span><span class="p">,</span> |
| <span class="n">maxBlockSizeInMB</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \</span> |
| <span class="sd"> maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \</span> |
| <span class="sd"> standardization=True, solver="auto", weightCol=None, aggregationDepth=2, \</span> |
| <span class="sd"> loss="squaredError", epsilon=1.35, maxBlockSizeInMB=0.0)</span> |
| <span class="sd"> Sets params for linear regression.</span> |
| <span class="sd"> """</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> |
| |
| <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegressionModel"</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">LinearRegressionModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="LinearRegression.setEpsilon"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegression.html#pyspark.ml.regression.LinearRegression.setEpsilon">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setEpsilon</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`epsilon`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">epsilon</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="LinearRegression.setMaxIter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegression.html#pyspark.ml.regression.LinearRegression.setMaxIter">[docs]</a> <span class="k">def</span> <span class="nf">setMaxIter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxIter`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="LinearRegression.setRegParam"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegression.html#pyspark.ml.regression.LinearRegression.setRegParam">[docs]</a> <span class="k">def</span> <span class="nf">setRegParam</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`regParam`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">regParam</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="LinearRegression.setTol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegression.html#pyspark.ml.regression.LinearRegression.setTol">[docs]</a> <span class="k">def</span> <span class="nf">setTol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`tol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">tol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="LinearRegression.setElasticNetParam"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegression.html#pyspark.ml.regression.LinearRegression.setElasticNetParam">[docs]</a> <span class="k">def</span> <span class="nf">setElasticNetParam</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`elasticNetParam`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">elasticNetParam</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="LinearRegression.setFitIntercept"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegression.html#pyspark.ml.regression.LinearRegression.setFitIntercept">[docs]</a> <span class="k">def</span> <span class="nf">setFitIntercept</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`fitIntercept`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">fitIntercept</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="LinearRegression.setStandardization"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegression.html#pyspark.ml.regression.LinearRegression.setStandardization">[docs]</a> <span class="k">def</span> <span class="nf">setStandardization</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`standardization`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">standardization</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="LinearRegression.setWeightCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegression.html#pyspark.ml.regression.LinearRegression.setWeightCol">[docs]</a> <span class="k">def</span> <span class="nf">setWeightCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`weightCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">weightCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="LinearRegression.setSolver"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegression.html#pyspark.ml.regression.LinearRegression.setSolver">[docs]</a> <span class="k">def</span> <span class="nf">setSolver</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`solver`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">solver</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="LinearRegression.setAggregationDepth"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegression.html#pyspark.ml.regression.LinearRegression.setAggregationDepth">[docs]</a> <span class="k">def</span> <span class="nf">setAggregationDepth</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`aggregationDepth`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">aggregationDepth</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="LinearRegression.setLoss"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegression.html#pyspark.ml.regression.LinearRegression.setLoss">[docs]</a> <span class="k">def</span> <span class="nf">setLoss</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`loss`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">lossType</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="LinearRegression.setMaxBlockSizeInMB"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegression.html#pyspark.ml.regression.LinearRegression.setMaxBlockSizeInMB">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMaxBlockSizeInMB</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxBlockSizeInMB`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxBlockSizeInMB</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> |
| |
| |
| <div class="viewcode-block" id="LinearRegressionModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegressionModel.html#pyspark.ml.regression.LinearRegressionModel">[docs]</a><span class="k">class</span> <span class="nc">LinearRegressionModel</span><span class="p">(</span> |
| <span class="n">_JavaRegressionModel</span><span class="p">,</span> |
| <span class="n">_LinearRegressionParams</span><span class="p">,</span> |
| <span class="n">GeneralJavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"LinearRegressionModel"</span><span class="p">],</span> |
| <span class="n">HasTrainingSummary</span><span class="p">[</span><span class="s2">"LinearRegressionSummary"</span><span class="p">],</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model fitted by :class:`LinearRegression`.</span> |
| |
| <span class="sd"> .. versionadded:: 1.4.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">coefficients</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model coefficients.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"coefficients"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">intercept</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model intercept.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"intercept"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">scale</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sa">r</span><span class="sd">"""</span> |
| <span class="sd"> The value by which :math:`\|y - X'w\|` is scaled down when loss is "huber", otherwise 1.0.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"scale"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">summary</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegressionTrainingSummary"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets summary (residuals, MSE, r-squared ) of model on</span> |
| <span class="sd"> training set. An exception is thrown if</span> |
| <span class="sd"> `trainingSummary is None`.</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">hasSummary</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">LinearRegressionTrainingSummary</span><span class="p">(</span><span class="nb">super</span><span class="p">(</span><span class="n">LinearRegressionModel</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">summary</span><span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span> |
| <span class="s2">"No training summary available for this </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span> |
| <span class="p">)</span> |
| |
| <div class="viewcode-block" id="LinearRegressionModel.evaluate"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegressionModel.html#pyspark.ml.regression.LinearRegressionModel.evaluate">[docs]</a> <span class="k">def</span> <span class="nf">evaluate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"LinearRegressionSummary"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Evaluates the model on a test dataset.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> dataset : :py:class:`pyspark.sql.DataFrame`</span> |
| <span class="sd"> Test dataset to evaluate model on, where dataset is an</span> |
| <span class="sd"> instance of :py:class:`pyspark.sql.DataFrame`</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">"dataset must be a DataFrame but got </span><span class="si">%s</span><span class="s2">."</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">dataset</span><span class="p">))</span> |
| <span class="n">java_lr_summary</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"evaluate"</span><span class="p">,</span> <span class="n">dataset</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">LinearRegressionSummary</span><span class="p">(</span><span class="n">java_lr_summary</span><span class="p">)</span></div></div> |
| |
| |
| <div class="viewcode-block" id="LinearRegressionSummary"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegressionSummary.html#pyspark.ml.regression.LinearRegressionSummary">[docs]</a><span class="k">class</span> <span class="nc">LinearRegressionSummary</span><span class="p">(</span><span class="n">JavaWrapper</span><span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Linear regression results evaluated on a dataset.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">predictions</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Dataframe outputted by the model's `transform` method.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"predictions"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">predictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Field in "predictions" which gives the predicted value of</span> |
| <span class="sd"> the label at each instance.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"predictionCol"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">labelCol</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Field in "predictions" which gives the true label of each</span> |
| <span class="sd"> instance.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"labelCol"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">featuresCol</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Field in "predictions" which gives the features of each instance</span> |
| <span class="sd"> as a vector.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"featuresCol"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">explainedVariance</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sa">r</span><span class="sd">"""</span> |
| <span class="sd"> Returns the explained variance regression score.</span> |
| <span class="sd"> explainedVariance = :math:`1 - \frac{variance(y - \hat{y})}{variance(y)}`</span> |
| |
| <span class="sd"> Notes</span> |
| <span class="sd"> -----</span> |
| <span class="sd"> This ignores instance weights (setting all to 1.0) from</span> |
| <span class="sd"> `LinearRegression.weightCol`. This will change in later Spark</span> |
| <span class="sd"> versions.</span> |
| |
| <span class="sd"> For additional information see</span> |
| <span class="sd"> `Explained variation on Wikipedia \</span> |
| <span class="sd"> <http://en.wikipedia.org/wiki/Explained_variation>`_</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"explainedVariance"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">meanAbsoluteError</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Returns the mean absolute error, which is a risk function</span> |
| <span class="sd"> corresponding to the expected value of the absolute error</span> |
| <span class="sd"> loss or l1-norm loss.</span> |
| |
| <span class="sd"> Notes</span> |
| <span class="sd"> -----</span> |
| <span class="sd"> This ignores instance weights (setting all to 1.0) from</span> |
| <span class="sd"> `LinearRegression.weightCol`. This will change in later Spark</span> |
| <span class="sd"> versions.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"meanAbsoluteError"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">meanSquaredError</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Returns the mean squared error, which is a risk function</span> |
| <span class="sd"> corresponding to the expected value of the squared error</span> |
| <span class="sd"> loss or quadratic loss.</span> |
| |
| <span class="sd"> Notes</span> |
| <span class="sd"> -----</span> |
| <span class="sd"> This ignores instance weights (setting all to 1.0) from</span> |
| <span class="sd"> `LinearRegression.weightCol`. This will change in later Spark</span> |
| <span class="sd"> versions.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"meanSquaredError"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">rootMeanSquaredError</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Returns the root mean squared error, which is defined as the</span> |
| <span class="sd"> square root of the mean squared error.</span> |
| |
| <span class="sd"> Notes</span> |
| <span class="sd"> -----</span> |
| <span class="sd"> This ignores instance weights (setting all to 1.0) from</span> |
| <span class="sd"> `LinearRegression.weightCol`. This will change in later Spark</span> |
| <span class="sd"> versions.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"rootMeanSquaredError"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">r2</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Returns R^2, the coefficient of determination.</span> |
| |
| <span class="sd"> Notes</span> |
| <span class="sd"> -----</span> |
| <span class="sd"> This ignores instance weights (setting all to 1.0) from</span> |
| <span class="sd"> `LinearRegression.weightCol`. This will change in later Spark</span> |
| <span class="sd"> versions.</span> |
| |
| <span class="sd"> See also `Wikipedia coefficient of determination \</span> |
| <span class="sd"> <http://en.wikipedia.org/wiki/Coefficient_of_determination>`_</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"r2"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">r2adj</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Returns Adjusted R^2, the adjusted coefficient of determination.</span> |
| |
| <span class="sd"> Notes</span> |
| <span class="sd"> -----</span> |
| <span class="sd"> This ignores instance weights (setting all to 1.0) from</span> |
| <span class="sd"> `LinearRegression.weightCol`. This will change in later Spark versions.</span> |
| |
| <span class="sd"> `Wikipedia coefficient of determination, Adjusted R^2 \</span> |
| <span class="sd"> <https://en.wikipedia.org/wiki/Coefficient_of_determination#Adjusted_R2>`_</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"r2adj"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">residuals</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Residuals (label - predicted value)</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"residuals"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">numInstances</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Number of instances in DataFrame predictions</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"numInstances"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">degreesOfFreedom</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Degrees of freedom.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"degreesOfFreedom"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">devianceResiduals</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> The weighted residuals, the usual residuals rescaled by the</span> |
| <span class="sd"> square root of the instance weights.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"devianceResiduals"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">coefficientStandardErrors</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Standard error of estimated coefficients and intercept.</span> |
| <span class="sd"> This value is only available when using the "normal" solver.</span> |
| |
| <span class="sd"> If :py:attr:`LinearRegression.fitIntercept` is set to True,</span> |
| <span class="sd"> then the last element returned corresponds to the intercept.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> LinearRegression.solver</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"coefficientStandardErrors"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">tValues</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> T-statistic of estimated coefficients and intercept.</span> |
| <span class="sd"> This value is only available when using the "normal" solver.</span> |
| |
| <span class="sd"> If :py:attr:`LinearRegression.fitIntercept` is set to True,</span> |
| <span class="sd"> then the last element returned corresponds to the intercept.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> LinearRegression.solver</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"tValues"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">pValues</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Two-sided p-value of estimated coefficients and intercept.</span> |
| <span class="sd"> This value is only available when using the "normal" solver.</span> |
| |
| <span class="sd"> If :py:attr:`LinearRegression.fitIntercept` is set to True,</span> |
| <span class="sd"> then the last element returned corresponds to the intercept.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> LinearRegression.solver</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"pValues"</span><span class="p">)</span></div> |
| |
| |
| <div class="viewcode-block" id="LinearRegressionTrainingSummary"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.LinearRegressionTrainingSummary.html#pyspark.ml.regression.LinearRegressionTrainingSummary">[docs]</a><span class="nd">@inherit_doc</span> |
| <span class="k">class</span> <span class="nc">LinearRegressionTrainingSummary</span><span class="p">(</span><span class="n">LinearRegressionSummary</span><span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Linear regression training results. Currently, the training summary ignores the</span> |
| <span class="sd"> training weights except for the objective trace.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">objectiveHistory</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Objective function (scaled loss + regularization) at each</span> |
| <span class="sd"> iteration.</span> |
| <span class="sd"> This value is only available when using the "l-bfgs" solver.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> LinearRegression.solver</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"objectiveHistory"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">totalIterations</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Number of training iterations until termination.</span> |
| <span class="sd"> This value is only available when using the "l-bfgs" solver.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| |
| <span class="sd"> See Also</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> LinearRegression.solver</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"totalIterations"</span><span class="p">)</span></div> |
| |
| |
| <span class="k">class</span> <span class="nc">_IsotonicRegressionParams</span><span class="p">(</span><span class="n">HasFeaturesCol</span><span class="p">,</span> <span class="n">HasLabelCol</span><span class="p">,</span> <span class="n">HasPredictionCol</span><span class="p">,</span> <span class="n">HasWeightCol</span><span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Params for :py:class:`IsotonicRegression` and :py:class:`IsotonicRegressionModel`.</span> |
| |
| <span class="sd"> .. versionadded:: 3.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">isotonic</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"isotonic"</span><span class="p">,</span> |
| <span class="s2">"whether the output sequence should be isotonic/increasing (true) or"</span> |
| <span class="o">+</span> <span class="s2">"antitonic/decreasing (false)."</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toBoolean</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="n">featureIndex</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"featureIndex"</span><span class="p">,</span> |
| <span class="s2">"The index of the feature if featuresCol is a vector column, no effect otherwise."</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">_IsotonicRegressionParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="n">isotonic</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">featureIndex</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">getIsotonic</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of isotonic or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">isotonic</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="nf">getFeatureIndex</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of featureIndex or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">featureIndex</span><span class="p">)</span> |
| |
| |
| <div class="viewcode-block" id="IsotonicRegression"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.IsotonicRegression.html#pyspark.ml.regression.IsotonicRegression">[docs]</a><span class="nd">@inherit_doc</span> |
| <span class="k">class</span> <span class="nc">IsotonicRegression</span><span class="p">(</span> |
| <span class="n">JavaEstimator</span><span class="p">,</span> <span class="n">_IsotonicRegressionParams</span><span class="p">,</span> <span class="n">HasWeightCol</span><span class="p">,</span> <span class="n">JavaMLWritable</span><span class="p">,</span> <span class="n">JavaMLReadable</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Currently implemented using parallelized pool adjacent violators algorithm.</span> |
| <span class="sd"> Only univariate (single feature) algorithm supported.</span> |
| |
| <span class="sd"> .. versionadded:: 1.6.0</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.ml.linalg import Vectors</span> |
| <span class="sd"> >>> df = spark.createDataFrame([</span> |
| <span class="sd"> ... (1.0, Vectors.dense(1.0)),</span> |
| <span class="sd"> ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"])</span> |
| <span class="sd"> >>> ir = IsotonicRegression()</span> |
| <span class="sd"> >>> model = ir.fit(df)</span> |
| <span class="sd"> >>> model.setFeaturesCol("features")</span> |
| <span class="sd"> IsotonicRegressionModel...</span> |
| <span class="sd"> >>> model.numFeatures</span> |
| <span class="sd"> 1</span> |
| <span class="sd"> >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])</span> |
| <span class="sd"> >>> model.transform(test0).head().prediction</span> |
| <span class="sd"> 0.0</span> |
| <span class="sd"> >>> model.predict(test0.head().features[model.getFeatureIndex()])</span> |
| <span class="sd"> 0.0</span> |
| <span class="sd"> >>> model.boundaries</span> |
| <span class="sd"> DenseVector([0.0, 1.0])</span> |
| <span class="sd"> >>> ir_path = temp_path + "/ir"</span> |
| <span class="sd"> >>> ir.save(ir_path)</span> |
| <span class="sd"> >>> ir2 = IsotonicRegression.load(ir_path)</span> |
| <span class="sd"> >>> ir2.getIsotonic()</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model_path = temp_path + "/ir_model"</span> |
| <span class="sd"> >>> model.save(model_path)</span> |
| <span class="sd"> >>> model2 = IsotonicRegressionModel.load(model_path)</span> |
| <span class="sd"> >>> model.boundaries == model2.boundaries</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.predictions == model2.predictions</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.transform(test0).take(1) == model2.transform(test0).take(1)</span> |
| <span class="sd"> True</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> |
| |
| <span class="nd">@keyword_only</span> |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">isotonic</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="n">featureIndex</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \</span> |
| <span class="sd"> weightCol=None, isotonic=True, featureIndex=0):</span> |
| <span class="sd"> """</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">IsotonicRegression</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> |
| <span class="s2">"org.apache.spark.ml.regression.IsotonicRegression"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> |
| <span class="p">)</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="IsotonicRegression.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.IsotonicRegression.html#pyspark.ml.regression.IsotonicRegression.setParams">[docs]</a> <span class="nd">@keyword_only</span> |
| <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">isotonic</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="n">featureIndex</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"IsotonicRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \</span> |
| <span class="sd"> weightCol=None, isotonic=True, featureIndex=0):</span> |
| <span class="sd"> Set the params for IsotonicRegression.</span> |
| <span class="sd"> """</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> |
| |
| <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IsotonicRegressionModel"</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">IsotonicRegressionModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="IsotonicRegression.setIsotonic"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.IsotonicRegression.html#pyspark.ml.regression.IsotonicRegression.setIsotonic">[docs]</a> <span class="k">def</span> <span class="nf">setIsotonic</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IsotonicRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`isotonic`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">isotonic</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="IsotonicRegression.setFeatureIndex"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.IsotonicRegression.html#pyspark.ml.regression.IsotonicRegression.setFeatureIndex">[docs]</a> <span class="k">def</span> <span class="nf">setFeatureIndex</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IsotonicRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`featureIndex`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featureIndex</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="IsotonicRegression.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.IsotonicRegression.html#pyspark.ml.regression.IsotonicRegression.setFeaturesCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IsotonicRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`featuresCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="IsotonicRegression.setPredictionCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.IsotonicRegression.html#pyspark.ml.regression.IsotonicRegression.setPredictionCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setPredictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IsotonicRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`predictionCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">predictionCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="IsotonicRegression.setLabelCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.IsotonicRegression.html#pyspark.ml.regression.IsotonicRegression.setLabelCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setLabelCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IsotonicRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`labelCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">labelCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="IsotonicRegression.setWeightCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.IsotonicRegression.html#pyspark.ml.regression.IsotonicRegression.setWeightCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setWeightCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IsotonicRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`weightCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">weightCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> |
| |
| |
| <div class="viewcode-block" id="IsotonicRegressionModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.IsotonicRegressionModel.html#pyspark.ml.regression.IsotonicRegressionModel">[docs]</a><span class="k">class</span> <span class="nc">IsotonicRegressionModel</span><span class="p">(</span> |
| <span class="n">JavaModel</span><span class="p">,</span> |
| <span class="n">_IsotonicRegressionParams</span><span class="p">,</span> |
| <span class="n">JavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"IsotonicRegressionModel"</span><span class="p">],</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model fitted by :class:`IsotonicRegression`.</span> |
| |
| <span class="sd"> .. versionadded:: 1.6.0</span> |
| <span class="sd"> """</span> |
| |
| <div class="viewcode-block" id="IsotonicRegressionModel.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.IsotonicRegressionModel.html#pyspark.ml.regression.IsotonicRegressionModel.setFeaturesCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IsotonicRegressionModel"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`featuresCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="IsotonicRegressionModel.setPredictionCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.IsotonicRegressionModel.html#pyspark.ml.regression.IsotonicRegressionModel.setPredictionCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setPredictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IsotonicRegressionModel"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`predictionCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">predictionCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="IsotonicRegressionModel.setFeatureIndex"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.IsotonicRegressionModel.html#pyspark.ml.regression.IsotonicRegressionModel.setFeatureIndex">[docs]</a> <span class="k">def</span> <span class="nf">setFeatureIndex</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"IsotonicRegressionModel"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`featureIndex`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featureIndex</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">boundaries</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Boundaries in increasing order for which predictions are known.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"boundaries"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">predictions</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Predictions associated with the boundaries at the same index, monotone because of isotonic</span> |
| <span class="sd"> regression.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"predictions"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">numFeatures</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Returns the number of features the model was trained on. If unknown, returns -1</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"numFeatures"</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="IsotonicRegressionModel.predict"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.IsotonicRegressionModel.html#pyspark.ml.regression.IsotonicRegressionModel.predict">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Predict label for the given features.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"predict"</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span></div></div> |
| |
| |
| <span class="k">class</span> <span class="nc">_DecisionTreeRegressorParams</span><span class="p">(</span><span class="n">_DecisionTreeParams</span><span class="p">,</span> <span class="n">_TreeRegressorParams</span><span class="p">,</span> <span class="n">HasVarianceCol</span><span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Params for :py:class:`DecisionTreeRegressor` and :py:class:`DecisionTreeRegressionModel`.</span> |
| |
| <span class="sd"> .. versionadded:: 3.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">_DecisionTreeRegressorParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span> |
| <span class="n">maxDepth</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> |
| <span class="n">maxBins</span><span class="o">=</span><span class="mi">32</span><span class="p">,</span> |
| <span class="n">minInstancesPerNode</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> |
| <span class="n">minInfoGain</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">maxMemoryInMB</span><span class="o">=</span><span class="mi">256</span><span class="p">,</span> |
| <span class="n">cacheNodeIds</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> |
| <span class="n">checkpointInterval</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> |
| <span class="n">impurity</span><span class="o">=</span><span class="s2">"variance"</span><span class="p">,</span> |
| <span class="n">leafCol</span><span class="o">=</span><span class="s2">""</span><span class="p">,</span> |
| <span class="n">minWeightFractionPerNode</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| |
| <div class="viewcode-block" id="DecisionTreeRegressor"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressor.html#pyspark.ml.regression.DecisionTreeRegressor">[docs]</a><span class="nd">@inherit_doc</span> |
| <span class="k">class</span> <span class="nc">DecisionTreeRegressor</span><span class="p">(</span> |
| <span class="n">_JavaRegressor</span><span class="p">[</span><span class="s2">"DecisionTreeRegressionModel"</span><span class="p">],</span> |
| <span class="n">_DecisionTreeRegressorParams</span><span class="p">,</span> |
| <span class="n">JavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"DecisionTreeRegressor"</span><span class="p">],</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> `Decision tree <http://en.wikipedia.org/wiki/Decision_tree_learning>`_</span> |
| <span class="sd"> learning algorithm for regression.</span> |
| <span class="sd"> It supports both continuous and categorical features.</span> |
| |
| <span class="sd"> .. versionadded:: 1.4.0</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.ml.linalg import Vectors</span> |
| <span class="sd"> >>> df = spark.createDataFrame([</span> |
| <span class="sd"> ... (1.0, Vectors.dense(1.0)),</span> |
| <span class="sd"> ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"])</span> |
| <span class="sd"> >>> dt = DecisionTreeRegressor(maxDepth=2)</span> |
| <span class="sd"> >>> dt.setVarianceCol("variance")</span> |
| <span class="sd"> DecisionTreeRegressor...</span> |
| <span class="sd"> >>> model = dt.fit(df)</span> |
| <span class="sd"> >>> model.getVarianceCol()</span> |
| <span class="sd"> 'variance'</span> |
| <span class="sd"> >>> model.setLeafCol("leafId")</span> |
| <span class="sd"> DecisionTreeRegressionModel...</span> |
| <span class="sd"> >>> model.depth</span> |
| <span class="sd"> 1</span> |
| <span class="sd"> >>> model.numNodes</span> |
| <span class="sd"> 3</span> |
| <span class="sd"> >>> model.featureImportances</span> |
| <span class="sd"> SparseVector(1, {0: 1.0})</span> |
| <span class="sd"> >>> model.numFeatures</span> |
| <span class="sd"> 1</span> |
| <span class="sd"> >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])</span> |
| <span class="sd"> >>> model.predict(test0.head().features)</span> |
| <span class="sd"> 0.0</span> |
| <span class="sd"> >>> result = model.transform(test0).head()</span> |
| <span class="sd"> >>> result.prediction</span> |
| <span class="sd"> 0.0</span> |
| <span class="sd"> >>> model.predictLeaf(test0.head().features)</span> |
| <span class="sd"> 0.0</span> |
| <span class="sd"> >>> result.leafId</span> |
| <span class="sd"> 0.0</span> |
| <span class="sd"> >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])</span> |
| <span class="sd"> >>> model.transform(test1).head().prediction</span> |
| <span class="sd"> 1.0</span> |
| <span class="sd"> >>> dtr_path = temp_path + "/dtr"</span> |
| <span class="sd"> >>> dt.save(dtr_path)</span> |
| <span class="sd"> >>> dt2 = DecisionTreeRegressor.load(dtr_path)</span> |
| <span class="sd"> >>> dt2.getMaxDepth()</span> |
| <span class="sd"> 2</span> |
| <span class="sd"> >>> model_path = temp_path + "/dtr_model"</span> |
| <span class="sd"> >>> model.save(model_path)</span> |
| <span class="sd"> >>> model2 = DecisionTreeRegressionModel.load(model_path)</span> |
| <span class="sd"> >>> model.numNodes == model2.numNodes</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.depth == model2.depth</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.transform(test1).head().variance</span> |
| <span class="sd"> 0.0</span> |
| <span class="sd"> >>> model.transform(test0).take(1) == model2.transform(test0).take(1)</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> df3 = spark.createDataFrame([</span> |
| <span class="sd"> ... (1.0, 0.2, Vectors.dense(1.0)),</span> |
| <span class="sd"> ... (1.0, 0.8, Vectors.dense(1.0)),</span> |
| <span class="sd"> ... (0.0, 1.0, Vectors.sparse(1, [], []))], ["label", "weight", "features"])</span> |
| <span class="sd"> >>> dt3 = DecisionTreeRegressor(maxDepth=2, weightCol="weight", varianceCol="variance")</span> |
| <span class="sd"> >>> model3 = dt3.fit(df3)</span> |
| <span class="sd"> >>> print(model3.toDebugString)</span> |
| <span class="sd"> DecisionTreeRegressionModel...depth=1, numNodes=3...</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> |
| |
| <span class="nd">@keyword_only</span> |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">maxDepth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> |
| <span class="n">maxBins</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">32</span><span class="p">,</span> |
| <span class="n">minInstancesPerNode</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> |
| <span class="n">minInfoGain</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">maxMemoryInMB</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">256</span><span class="p">,</span> |
| <span class="n">cacheNodeIds</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> |
| <span class="n">checkpointInterval</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span> |
| <span class="n">impurity</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"variance"</span><span class="p">,</span> |
| <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">varianceCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">leafCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span><span class="p">,</span> |
| <span class="n">minWeightFractionPerNode</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \</span> |
| <span class="sd"> maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \</span> |
| <span class="sd"> maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \</span> |
| <span class="sd"> impurity="variance", seed=None, varianceCol=None, weightCol=None, \</span> |
| <span class="sd"> leafCol="", minWeightFractionPerNode=0.0)</span> |
| <span class="sd"> """</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">DecisionTreeRegressor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> |
| <span class="s2">"org.apache.spark.ml.regression.DecisionTreeRegressor"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> |
| <span class="p">)</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="DecisionTreeRegressor.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressor.html#pyspark.ml.regression.DecisionTreeRegressor.setParams">[docs]</a> <span class="nd">@keyword_only</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">maxDepth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> |
| <span class="n">maxBins</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">32</span><span class="p">,</span> |
| <span class="n">minInstancesPerNode</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> |
| <span class="n">minInfoGain</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">maxMemoryInMB</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">256</span><span class="p">,</span> |
| <span class="n">cacheNodeIds</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> |
| <span class="n">checkpointInterval</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span> |
| <span class="n">impurity</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"variance"</span><span class="p">,</span> |
| <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">varianceCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">leafCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span><span class="p">,</span> |
| <span class="n">minWeightFractionPerNode</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"DecisionTreeRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \</span> |
| <span class="sd"> maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \</span> |
| <span class="sd"> maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \</span> |
| <span class="sd"> impurity="variance", seed=None, varianceCol=None, weightCol=None, \</span> |
| <span class="sd"> leafCol="", minWeightFractionPerNode=0.0)</span> |
| <span class="sd"> Sets params for the DecisionTreeRegressor.</span> |
| <span class="sd"> """</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> |
| |
| <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DecisionTreeRegressionModel"</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">DecisionTreeRegressionModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="DecisionTreeRegressor.setMaxDepth"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressor.html#pyspark.ml.regression.DecisionTreeRegressor.setMaxDepth">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMaxDepth</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DecisionTreeRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxDepth`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxDepth</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="DecisionTreeRegressor.setMaxBins"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressor.html#pyspark.ml.regression.DecisionTreeRegressor.setMaxBins">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMaxBins</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DecisionTreeRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxBins`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxBins</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="DecisionTreeRegressor.setMinInstancesPerNode"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressor.html#pyspark.ml.regression.DecisionTreeRegressor.setMinInstancesPerNode">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMinInstancesPerNode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DecisionTreeRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`minInstancesPerNode`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minInstancesPerNode</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="DecisionTreeRegressor.setMinWeightFractionPerNode"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressor.html#pyspark.ml.regression.DecisionTreeRegressor.setMinWeightFractionPerNode">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMinWeightFractionPerNode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DecisionTreeRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`minWeightFractionPerNode`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minWeightFractionPerNode</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="DecisionTreeRegressor.setMinInfoGain"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressor.html#pyspark.ml.regression.DecisionTreeRegressor.setMinInfoGain">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMinInfoGain</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DecisionTreeRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`minInfoGain`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minInfoGain</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="DecisionTreeRegressor.setMaxMemoryInMB"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressor.html#pyspark.ml.regression.DecisionTreeRegressor.setMaxMemoryInMB">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMaxMemoryInMB</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DecisionTreeRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxMemoryInMB`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxMemoryInMB</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="DecisionTreeRegressor.setCacheNodeIds"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressor.html#pyspark.ml.regression.DecisionTreeRegressor.setCacheNodeIds">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setCacheNodeIds</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DecisionTreeRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`cacheNodeIds`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">cacheNodeIds</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="DecisionTreeRegressor.setImpurity"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressor.html#pyspark.ml.regression.DecisionTreeRegressor.setImpurity">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setImpurity</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DecisionTreeRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`impurity`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">impurity</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="DecisionTreeRegressor.setCheckpointInterval"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressor.html#pyspark.ml.regression.DecisionTreeRegressor.setCheckpointInterval">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setCheckpointInterval</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DecisionTreeRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`checkpointInterval`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">checkpointInterval</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="DecisionTreeRegressor.setSeed"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressor.html#pyspark.ml.regression.DecisionTreeRegressor.setSeed">[docs]</a> <span class="k">def</span> <span class="nf">setSeed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DecisionTreeRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`seed`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="DecisionTreeRegressor.setWeightCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressor.html#pyspark.ml.regression.DecisionTreeRegressor.setWeightCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setWeightCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DecisionTreeRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`weightCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">weightCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="DecisionTreeRegressor.setVarianceCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressor.html#pyspark.ml.regression.DecisionTreeRegressor.setVarianceCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setVarianceCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DecisionTreeRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`varianceCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">varianceCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> |
| |
| |
| <div class="viewcode-block" id="DecisionTreeRegressionModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressionModel.html#pyspark.ml.regression.DecisionTreeRegressionModel">[docs]</a><span class="nd">@inherit_doc</span> |
| <span class="k">class</span> <span class="nc">DecisionTreeRegressionModel</span><span class="p">(</span> |
| <span class="n">_JavaRegressionModel</span><span class="p">,</span> |
| <span class="n">_DecisionTreeModel</span><span class="p">,</span> |
| <span class="n">_DecisionTreeRegressorParams</span><span class="p">,</span> |
| <span class="n">JavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"DecisionTreeRegressionModel"</span><span class="p">],</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model fitted by :class:`DecisionTreeRegressor`.</span> |
| |
| <span class="sd"> .. versionadded:: 1.4.0</span> |
| <span class="sd"> """</span> |
| |
| <div class="viewcode-block" id="DecisionTreeRegressionModel.setVarianceCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.DecisionTreeRegressionModel.html#pyspark.ml.regression.DecisionTreeRegressionModel.setVarianceCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setVarianceCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"DecisionTreeRegressionModel"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`varianceCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">varianceCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">featureImportances</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Estimate of the importance of each feature.</span> |
| |
| <span class="sd"> This generalizes the idea of "Gini" importance to other losses,</span> |
| <span class="sd"> following the explanation of Gini importance from "Random Forests" documentation</span> |
| <span class="sd"> by Leo Breiman and Adele Cutler, and following the implementation from scikit-learn.</span> |
| |
| <span class="sd"> This feature importance is calculated as follows:</span> |
| <span class="sd"> - importance(feature j) = sum (over nodes which split on feature j) of the gain,</span> |
| <span class="sd"> where gain is scaled by the number of instances passing through node</span> |
| <span class="sd"> - Normalize importances for tree to sum to 1.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| |
| <span class="sd"> Notes</span> |
| <span class="sd"> -----</span> |
| <span class="sd"> Feature importance for single decision trees can have high variance due to</span> |
| <span class="sd"> correlated predictor variables. Consider using a :py:class:`RandomForestRegressor`</span> |
| <span class="sd"> to determine feature importance instead.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"featureImportances"</span><span class="p">)</span></div> |
| |
| |
| <span class="k">class</span> <span class="nc">_RandomForestRegressorParams</span><span class="p">(</span><span class="n">_RandomForestParams</span><span class="p">,</span> <span class="n">_TreeRegressorParams</span><span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Params for :py:class:`RandomForestRegressor` and :py:class:`RandomForestRegressionModel`.</span> |
| |
| <span class="sd"> .. versionadded:: 3.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">_RandomForestRegressorParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span> |
| <span class="n">maxDepth</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> |
| <span class="n">maxBins</span><span class="o">=</span><span class="mi">32</span><span class="p">,</span> |
| <span class="n">minInstancesPerNode</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> |
| <span class="n">minInfoGain</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">maxMemoryInMB</span><span class="o">=</span><span class="mi">256</span><span class="p">,</span> |
| <span class="n">cacheNodeIds</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> |
| <span class="n">checkpointInterval</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> |
| <span class="n">impurity</span><span class="o">=</span><span class="s2">"variance"</span><span class="p">,</span> |
| <span class="n">subsamplingRate</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span> |
| <span class="n">numTrees</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> |
| <span class="n">featureSubsetStrategy</span><span class="o">=</span><span class="s2">"auto"</span><span class="p">,</span> |
| <span class="n">leafCol</span><span class="o">=</span><span class="s2">""</span><span class="p">,</span> |
| <span class="n">minWeightFractionPerNode</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">bootstrap</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| |
| <div class="viewcode-block" id="RandomForestRegressor"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor">[docs]</a><span class="nd">@inherit_doc</span> |
| <span class="k">class</span> <span class="nc">RandomForestRegressor</span><span class="p">(</span> |
| <span class="n">_JavaRegressor</span><span class="p">[</span><span class="s2">"RandomForestRegressionModel"</span><span class="p">],</span> |
| <span class="n">_RandomForestRegressorParams</span><span class="p">,</span> |
| <span class="n">JavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"RandomForestRegressor"</span><span class="p">],</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> `Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_</span> |
| <span class="sd"> learning algorithm for regression.</span> |
| <span class="sd"> It supports both continuous and categorical features.</span> |
| |
| <span class="sd"> .. versionadded:: 1.4.0</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from numpy import allclose</span> |
| <span class="sd"> >>> from pyspark.ml.linalg import Vectors</span> |
| <span class="sd"> >>> df = spark.createDataFrame([</span> |
| <span class="sd"> ... (1.0, Vectors.dense(1.0)),</span> |
| <span class="sd"> ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"])</span> |
| <span class="sd"> >>> rf = RandomForestRegressor(numTrees=2, maxDepth=2)</span> |
| <span class="sd"> >>> rf.getMinWeightFractionPerNode()</span> |
| <span class="sd"> 0.0</span> |
| <span class="sd"> >>> rf.setSeed(42)</span> |
| <span class="sd"> RandomForestRegressor...</span> |
| <span class="sd"> >>> model = rf.fit(df)</span> |
| <span class="sd"> >>> model.getBootstrap()</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.getSeed()</span> |
| <span class="sd"> 42</span> |
| <span class="sd"> >>> model.setLeafCol("leafId")</span> |
| <span class="sd"> RandomForestRegressionModel...</span> |
| <span class="sd"> >>> model.featureImportances</span> |
| <span class="sd"> SparseVector(1, {0: 1.0})</span> |
| <span class="sd"> >>> allclose(model.treeWeights, [1.0, 1.0])</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])</span> |
| <span class="sd"> >>> model.predict(test0.head().features)</span> |
| <span class="sd"> 0.0</span> |
| <span class="sd"> >>> model.predictLeaf(test0.head().features)</span> |
| <span class="sd"> DenseVector([0.0, 0.0])</span> |
| <span class="sd"> >>> result = model.transform(test0).head()</span> |
| <span class="sd"> >>> result.prediction</span> |
| <span class="sd"> 0.0</span> |
| <span class="sd"> >>> result.leafId</span> |
| <span class="sd"> DenseVector([0.0, 0.0])</span> |
| <span class="sd"> >>> model.numFeatures</span> |
| <span class="sd"> 1</span> |
| <span class="sd"> >>> model.trees</span> |
| <span class="sd"> [DecisionTreeRegressionModel...depth=..., DecisionTreeRegressionModel...]</span> |
| <span class="sd"> >>> model.getNumTrees</span> |
| <span class="sd"> 2</span> |
| <span class="sd"> >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])</span> |
| <span class="sd"> >>> model.transform(test1).head().prediction</span> |
| <span class="sd"> 0.5</span> |
| <span class="sd"> >>> rfr_path = temp_path + "/rfr"</span> |
| <span class="sd"> >>> rf.save(rfr_path)</span> |
| <span class="sd"> >>> rf2 = RandomForestRegressor.load(rfr_path)</span> |
| <span class="sd"> >>> rf2.getNumTrees()</span> |
| <span class="sd"> 2</span> |
| <span class="sd"> >>> model_path = temp_path + "/rfr_model"</span> |
| <span class="sd"> >>> model.save(model_path)</span> |
| <span class="sd"> >>> model2 = RandomForestRegressionModel.load(model_path)</span> |
| <span class="sd"> >>> model.featureImportances == model2.featureImportances</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.transform(test0).take(1) == model2.transform(test0).take(1)</span> |
| <span class="sd"> True</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> |
| |
| <span class="nd">@keyword_only</span> |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">maxDepth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> |
| <span class="n">maxBins</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">32</span><span class="p">,</span> |
| <span class="n">minInstancesPerNode</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> |
| <span class="n">minInfoGain</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">maxMemoryInMB</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">256</span><span class="p">,</span> |
| <span class="n">cacheNodeIds</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> |
| <span class="n">checkpointInterval</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span> |
| <span class="n">impurity</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"variance"</span><span class="p">,</span> |
| <span class="n">subsamplingRate</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> |
| <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">numTrees</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">20</span><span class="p">,</span> |
| <span class="n">featureSubsetStrategy</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"auto"</span><span class="p">,</span> |
| <span class="n">leafCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span><span class="p">,</span> |
| <span class="n">minWeightFractionPerNode</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">bootstrap</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \</span> |
| <span class="sd"> maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \</span> |
| <span class="sd"> maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \</span> |
| <span class="sd"> impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20, \</span> |
| <span class="sd"> featureSubsetStrategy="auto", leafCol=", minWeightFractionPerNode=0.0", \</span> |
| <span class="sd"> weightCol=None, bootstrap=True)</span> |
| <span class="sd"> """</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">RandomForestRegressor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> |
| <span class="s2">"org.apache.spark.ml.regression.RandomForestRegressor"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> |
| <span class="p">)</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setParams">[docs]</a> <span class="nd">@keyword_only</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">maxDepth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> |
| <span class="n">maxBins</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">32</span><span class="p">,</span> |
| <span class="n">minInstancesPerNode</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> |
| <span class="n">minInfoGain</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">maxMemoryInMB</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">256</span><span class="p">,</span> |
| <span class="n">cacheNodeIds</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> |
| <span class="n">checkpointInterval</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span> |
| <span class="n">impurity</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"variance"</span><span class="p">,</span> |
| <span class="n">subsamplingRate</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> |
| <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">numTrees</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">20</span><span class="p">,</span> |
| <span class="n">featureSubsetStrategy</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"auto"</span><span class="p">,</span> |
| <span class="n">leafCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span><span class="p">,</span> |
| <span class="n">minWeightFractionPerNode</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">bootstrap</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \</span> |
| <span class="sd"> maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \</span> |
| <span class="sd"> maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \</span> |
| <span class="sd"> impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20, \</span> |
| <span class="sd"> featureSubsetStrategy="auto", leafCol="", minWeightFractionPerNode=0.0, \</span> |
| <span class="sd"> weightCol=None, bootstrap=True)</span> |
| <span class="sd"> Sets params for linear regression.</span> |
| <span class="sd"> """</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> |
| |
| <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressionModel"</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">RandomForestRegressionModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setMaxDepth"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setMaxDepth">[docs]</a> <span class="k">def</span> <span class="nf">setMaxDepth</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxDepth`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxDepth</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setMaxBins"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setMaxBins">[docs]</a> <span class="k">def</span> <span class="nf">setMaxBins</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxBins`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxBins</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setMinInstancesPerNode"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setMinInstancesPerNode">[docs]</a> <span class="k">def</span> <span class="nf">setMinInstancesPerNode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`minInstancesPerNode`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minInstancesPerNode</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setMinInfoGain"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setMinInfoGain">[docs]</a> <span class="k">def</span> <span class="nf">setMinInfoGain</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`minInfoGain`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minInfoGain</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setMaxMemoryInMB"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setMaxMemoryInMB">[docs]</a> <span class="k">def</span> <span class="nf">setMaxMemoryInMB</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxMemoryInMB`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxMemoryInMB</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setCacheNodeIds"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setCacheNodeIds">[docs]</a> <span class="k">def</span> <span class="nf">setCacheNodeIds</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`cacheNodeIds`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">cacheNodeIds</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setImpurity"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setImpurity">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setImpurity</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`impurity`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">impurity</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setNumTrees"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setNumTrees">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setNumTrees</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`numTrees`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">numTrees</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setBootstrap"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setBootstrap">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setBootstrap</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`bootstrap`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">bootstrap</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setSubsamplingRate"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setSubsamplingRate">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setSubsamplingRate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`subsamplingRate`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">subsamplingRate</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setFeatureSubsetStrategy"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setFeatureSubsetStrategy">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setFeatureSubsetStrategy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`featureSubsetStrategy`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featureSubsetStrategy</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setCheckpointInterval"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setCheckpointInterval">[docs]</a> <span class="k">def</span> <span class="nf">setCheckpointInterval</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`checkpointInterval`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">checkpointInterval</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setSeed"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setSeed">[docs]</a> <span class="k">def</span> <span class="nf">setSeed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`seed`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setWeightCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setWeightCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setWeightCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`weightCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">weightCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="RandomForestRegressor.setMinWeightFractionPerNode"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressor.html#pyspark.ml.regression.RandomForestRegressor.setMinWeightFractionPerNode">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMinWeightFractionPerNode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"RandomForestRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`minWeightFractionPerNode`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minWeightFractionPerNode</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> |
| |
| |
| <div class="viewcode-block" id="RandomForestRegressionModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.RandomForestRegressionModel.html#pyspark.ml.regression.RandomForestRegressionModel">[docs]</a><span class="k">class</span> <span class="nc">RandomForestRegressionModel</span><span class="p">(</span> |
| <span class="n">_JavaRegressionModel</span><span class="p">[</span><span class="n">Vector</span><span class="p">],</span> |
| <span class="n">_TreeEnsembleModel</span><span class="p">,</span> |
| <span class="n">_RandomForestRegressorParams</span><span class="p">,</span> |
| <span class="n">JavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"RandomForestRegressionModel"</span><span class="p">],</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model fitted by :class:`RandomForestRegressor`.</span> |
| |
| <span class="sd"> .. versionadded:: 1.4.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">trees</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">DecisionTreeRegressionModel</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""Trees in this ensemble. Warning: These have null parent Estimators."""</span> |
| <span class="k">return</span> <span class="p">[</span><span class="n">DecisionTreeRegressionModel</span><span class="p">(</span><span class="n">m</span><span class="p">)</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"trees"</span><span class="p">))]</span> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">featureImportances</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Estimate of the importance of each feature.</span> |
| |
| <span class="sd"> Each feature's importance is the average of its importance across all trees in the ensemble</span> |
| <span class="sd"> The importance vector is normalized to sum to 1. This method is suggested by Hastie et al.</span> |
| <span class="sd"> (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)</span> |
| <span class="sd"> and follows the implementation from scikit-learn.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> DecisionTreeRegressionModel.featureImportances</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"featureImportances"</span><span class="p">)</span></div> |
| |
| |
| <span class="k">class</span> <span class="nc">_GBTRegressorParams</span><span class="p">(</span><span class="n">_GBTParams</span><span class="p">,</span> <span class="n">_TreeRegressorParams</span><span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Params for :py:class:`GBTRegressor` and :py:class:`GBTRegressorModel`.</span> |
| |
| <span class="sd"> .. versionadded:: 3.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">supportedLossTypes</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"squared"</span><span class="p">,</span> <span class="s2">"absolute"</span><span class="p">]</span> |
| |
| <span class="n">lossType</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"lossType"</span><span class="p">,</span> |
| <span class="s2">"Loss function which GBT tries to minimize (case-insensitive). "</span> |
| <span class="o">+</span> <span class="s2">"Supported options: "</span> |
| <span class="o">+</span> <span class="s2">", "</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">supportedLossTypes</span><span class="p">),</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">_GBTRegressorParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span> |
| <span class="n">maxDepth</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> |
| <span class="n">maxBins</span><span class="o">=</span><span class="mi">32</span><span class="p">,</span> |
| <span class="n">minInstancesPerNode</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> |
| <span class="n">minInfoGain</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">maxMemoryInMB</span><span class="o">=</span><span class="mi">256</span><span class="p">,</span> |
| <span class="n">cacheNodeIds</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> |
| <span class="n">subsamplingRate</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span> |
| <span class="n">checkpointInterval</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> |
| <span class="n">lossType</span><span class="o">=</span><span class="s2">"squared"</span><span class="p">,</span> |
| <span class="n">maxIter</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> |
| <span class="n">stepSize</span><span class="o">=</span><span class="mf">0.1</span><span class="p">,</span> |
| <span class="n">impurity</span><span class="o">=</span><span class="s2">"variance"</span><span class="p">,</span> |
| <span class="n">featureSubsetStrategy</span><span class="o">=</span><span class="s2">"all"</span><span class="p">,</span> |
| <span class="n">validationTol</span><span class="o">=</span><span class="mf">0.01</span><span class="p">,</span> |
| <span class="n">leafCol</span><span class="o">=</span><span class="s2">""</span><span class="p">,</span> |
| <span class="n">minWeightFractionPerNode</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">getLossType</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of lossType or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">lossType</span><span class="p">)</span> |
| |
| |
| <div class="viewcode-block" id="GBTRegressor"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor">[docs]</a><span class="nd">@inherit_doc</span> |
| <span class="k">class</span> <span class="nc">GBTRegressor</span><span class="p">(</span> |
| <span class="n">_JavaRegressor</span><span class="p">[</span><span class="s2">"GBTRegressionModel"</span><span class="p">],</span> |
| <span class="n">_GBTRegressorParams</span><span class="p">,</span> |
| <span class="n">JavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"GBTRegressor"</span><span class="p">],</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_</span> |
| <span class="sd"> learning algorithm for regression.</span> |
| <span class="sd"> It supports both continuous and categorical features.</span> |
| |
| <span class="sd"> .. versionadded:: 1.4.0</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from numpy import allclose</span> |
| <span class="sd"> >>> from pyspark.ml.linalg import Vectors</span> |
| <span class="sd"> >>> df = spark.createDataFrame([</span> |
| <span class="sd"> ... (1.0, Vectors.dense(1.0)),</span> |
| <span class="sd"> ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"])</span> |
| <span class="sd"> >>> gbt = GBTRegressor(maxDepth=2, seed=42, leafCol="leafId")</span> |
| <span class="sd"> >>> gbt.setMaxIter(5)</span> |
| <span class="sd"> GBTRegressor...</span> |
| <span class="sd"> >>> gbt.setMinWeightFractionPerNode(0.049)</span> |
| <span class="sd"> GBTRegressor...</span> |
| <span class="sd"> >>> gbt.getMaxIter()</span> |
| <span class="sd"> 5</span> |
| <span class="sd"> >>> print(gbt.getImpurity())</span> |
| <span class="sd"> variance</span> |
| <span class="sd"> >>> print(gbt.getFeatureSubsetStrategy())</span> |
| <span class="sd"> all</span> |
| <span class="sd"> >>> model = gbt.fit(df)</span> |
| <span class="sd"> >>> model.featureImportances</span> |
| <span class="sd"> SparseVector(1, {0: 1.0})</span> |
| <span class="sd"> >>> model.numFeatures</span> |
| <span class="sd"> 1</span> |
| <span class="sd"> >>> allclose(model.treeWeights, [1.0, 0.1, 0.1, 0.1, 0.1])</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])</span> |
| <span class="sd"> >>> model.predict(test0.head().features)</span> |
| <span class="sd"> 0.0</span> |
| <span class="sd"> >>> model.predictLeaf(test0.head().features)</span> |
| <span class="sd"> DenseVector([0.0, 0.0, 0.0, 0.0, 0.0])</span> |
| <span class="sd"> >>> result = model.transform(test0).head()</span> |
| <span class="sd"> >>> result.prediction</span> |
| <span class="sd"> 0.0</span> |
| <span class="sd"> >>> result.leafId</span> |
| <span class="sd"> DenseVector([0.0, 0.0, 0.0, 0.0, 0.0])</span> |
| <span class="sd"> >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])</span> |
| <span class="sd"> >>> model.transform(test1).head().prediction</span> |
| <span class="sd"> 1.0</span> |
| <span class="sd"> >>> gbtr_path = temp_path + "gbtr"</span> |
| <span class="sd"> >>> gbt.save(gbtr_path)</span> |
| <span class="sd"> >>> gbt2 = GBTRegressor.load(gbtr_path)</span> |
| <span class="sd"> >>> gbt2.getMaxDepth()</span> |
| <span class="sd"> 2</span> |
| <span class="sd"> >>> model_path = temp_path + "gbtr_model"</span> |
| <span class="sd"> >>> model.save(model_path)</span> |
| <span class="sd"> >>> model2 = GBTRegressionModel.load(model_path)</span> |
| <span class="sd"> >>> model.featureImportances == model2.featureImportances</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.treeWeights == model2.treeWeights</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.transform(test0).take(1) == model2.transform(test0).take(1)</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.trees</span> |
| <span class="sd"> [DecisionTreeRegressionModel...depth=..., DecisionTreeRegressionModel...]</span> |
| <span class="sd"> >>> validation = spark.createDataFrame([(0.0, Vectors.dense(-1.0))],</span> |
| <span class="sd"> ... ["label", "features"])</span> |
| <span class="sd"> >>> model.evaluateEachIteration(validation, "squared")</span> |
| <span class="sd"> [0.0, 0.0, 0.0, 0.0, 0.0]</span> |
| <span class="sd"> >>> gbt = gbt.setValidationIndicatorCol("validationIndicator")</span> |
| <span class="sd"> >>> gbt.getValidationIndicatorCol()</span> |
| <span class="sd"> 'validationIndicator'</span> |
| <span class="sd"> >>> gbt.getValidationTol()</span> |
| <span class="sd"> 0.01</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> |
| |
| <span class="nd">@keyword_only</span> |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">maxDepth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> |
| <span class="n">maxBins</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">32</span><span class="p">,</span> |
| <span class="n">minInstancesPerNode</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> |
| <span class="n">minInfoGain</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">maxMemoryInMB</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">256</span><span class="p">,</span> |
| <span class="n">cacheNodeIds</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> |
| <span class="n">subsamplingRate</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> |
| <span class="n">checkpointInterval</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span> |
| <span class="n">lossType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"squared"</span><span class="p">,</span> |
| <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">20</span><span class="p">,</span> |
| <span class="n">stepSize</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.1</span><span class="p">,</span> |
| <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">impurity</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"variance"</span><span class="p">,</span> |
| <span class="n">featureSubsetStrategy</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"all"</span><span class="p">,</span> |
| <span class="n">validationTol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.1</span><span class="p">,</span> |
| <span class="n">validationIndicatorCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">leafCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span><span class="p">,</span> |
| <span class="n">minWeightFractionPerNode</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \</span> |
| <span class="sd"> maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \</span> |
| <span class="sd"> maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \</span> |
| <span class="sd"> checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \</span> |
| <span class="sd"> impurity="variance", featureSubsetStrategy="all", validationTol=0.01, \</span> |
| <span class="sd"> validationIndicatorCol=None, leafCol="", minWeightFractionPerNode=0.0,</span> |
| <span class="sd"> weightCol=None)</span> |
| <span class="sd"> """</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">GBTRegressor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.regression.GBTRegressor"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="GBTRegressor.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setParams">[docs]</a> <span class="nd">@keyword_only</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">maxDepth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">,</span> |
| <span class="n">maxBins</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">32</span><span class="p">,</span> |
| <span class="n">minInstancesPerNode</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">,</span> |
| <span class="n">minInfoGain</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">maxMemoryInMB</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">256</span><span class="p">,</span> |
| <span class="n">cacheNodeIds</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> |
| <span class="n">subsamplingRate</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> |
| <span class="n">checkpointInterval</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span> |
| <span class="n">lossType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"squared"</span><span class="p">,</span> |
| <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">20</span><span class="p">,</span> |
| <span class="n">stepSize</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.1</span><span class="p">,</span> |
| <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">impurity</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"variance"</span><span class="p">,</span> |
| <span class="n">featureSubsetStrategy</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"all"</span><span class="p">,</span> |
| <span class="n">validationTol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.1</span><span class="p">,</span> |
| <span class="n">validationIndicatorCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">leafCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">""</span><span class="p">,</span> |
| <span class="n">minWeightFractionPerNode</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \</span> |
| <span class="sd"> maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \</span> |
| <span class="sd"> maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \</span> |
| <span class="sd"> checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \</span> |
| <span class="sd"> impurity="variance", featureSubsetStrategy="all", validationTol=0.01, \</span> |
| <span class="sd"> validationIndicatorCol=None, leafCol="", minWeightFractionPerNode=0.0, \</span> |
| <span class="sd"> weightCol=None)</span> |
| <span class="sd"> Sets params for Gradient Boosted Tree Regression.</span> |
| <span class="sd"> """</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> |
| |
| <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressionModel"</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">GBTRegressionModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="GBTRegressor.setMaxDepth"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setMaxDepth">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMaxDepth</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxDepth`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxDepth</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setMaxBins"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setMaxBins">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMaxBins</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxBins`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxBins</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setMinInstancesPerNode"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setMinInstancesPerNode">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMinInstancesPerNode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`minInstancesPerNode`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minInstancesPerNode</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setMinInfoGain"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setMinInfoGain">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMinInfoGain</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`minInfoGain`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minInfoGain</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setMaxMemoryInMB"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setMaxMemoryInMB">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMaxMemoryInMB</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxMemoryInMB`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxMemoryInMB</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setCacheNodeIds"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setCacheNodeIds">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setCacheNodeIds</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`cacheNodeIds`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">cacheNodeIds</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setImpurity"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setImpurity">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setImpurity</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`impurity`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">impurity</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setLossType"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setLossType">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setLossType</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`lossType`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">lossType</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setSubsamplingRate"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setSubsamplingRate">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setSubsamplingRate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`subsamplingRate`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">subsamplingRate</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setFeatureSubsetStrategy"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setFeatureSubsetStrategy">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setFeatureSubsetStrategy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`featureSubsetStrategy`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featureSubsetStrategy</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setValidationIndicatorCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setValidationIndicatorCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setValidationIndicatorCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`validationIndicatorCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">validationIndicatorCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setMaxIter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setMaxIter">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMaxIter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxIter`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setCheckpointInterval"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setCheckpointInterval">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setCheckpointInterval</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`checkpointInterval`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">checkpointInterval</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setSeed"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setSeed">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setSeed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`seed`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setStepSize"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setStepSize">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.4.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setStepSize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`stepSize`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">stepSize</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setWeightCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setWeightCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setWeightCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`weightCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">weightCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GBTRegressor.setMinWeightFractionPerNode"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressor.html#pyspark.ml.regression.GBTRegressor.setMinWeightFractionPerNode">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMinWeightFractionPerNode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GBTRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`minWeightFractionPerNode`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">minWeightFractionPerNode</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> |
| |
| |
| <div class="viewcode-block" id="GBTRegressionModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressionModel.html#pyspark.ml.regression.GBTRegressionModel">[docs]</a><span class="k">class</span> <span class="nc">GBTRegressionModel</span><span class="p">(</span> |
| <span class="n">_JavaRegressionModel</span><span class="p">[</span><span class="n">Vector</span><span class="p">],</span> |
| <span class="n">_TreeEnsembleModel</span><span class="p">,</span> |
| <span class="n">_GBTRegressorParams</span><span class="p">,</span> |
| <span class="n">JavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"GBTRegressionModel"</span><span class="p">],</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model fitted by :class:`GBTRegressor`.</span> |
| |
| <span class="sd"> .. versionadded:: 1.4.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="nd">@property</span> |
| <span class="k">def</span> <span class="nf">featureImportances</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Estimate of the importance of each feature.</span> |
| |
| <span class="sd"> Each feature's importance is the average of its importance across all trees in the ensemble</span> |
| <span class="sd"> The importance vector is normalized to sum to 1. This method is suggested by Hastie et al.</span> |
| <span class="sd"> (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)</span> |
| <span class="sd"> and follows the implementation from scikit-learn.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> DecisionTreeRegressionModel.featureImportances</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"featureImportances"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">trees</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="n">DecisionTreeRegressionModel</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""Trees in this ensemble. Warning: These have null parent Estimators."""</span> |
| <span class="k">return</span> <span class="p">[</span><span class="n">DecisionTreeRegressionModel</span><span class="p">(</span><span class="n">m</span><span class="p">)</span> <span class="k">for</span> <span class="n">m</span> <span class="ow">in</span> <span class="nb">list</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"trees"</span><span class="p">))]</span> |
| |
| <div class="viewcode-block" id="GBTRegressionModel.evaluateEachIteration"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GBTRegressionModel.html#pyspark.ml.regression.GBTRegressionModel.evaluateEachIteration">[docs]</a> <span class="k">def</span> <span class="nf">evaluateEachIteration</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">loss</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Method to compute error or loss for every iteration of gradient boosting.</span> |
| |
| <span class="sd"> .. versionadded:: 2.4.0</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> dataset : :py:class:`pyspark.sql.DataFrame`</span> |
| <span class="sd"> Test dataset to evaluate model on, where dataset is an</span> |
| <span class="sd"> instance of :py:class:`pyspark.sql.DataFrame`</span> |
| <span class="sd"> loss : str</span> |
| <span class="sd"> The loss function used to compute error.</span> |
| <span class="sd"> Supported options: squared, absolute</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"evaluateEachIteration"</span><span class="p">,</span> <span class="n">dataset</span><span class="p">,</span> <span class="n">loss</span><span class="p">)</span></div></div> |
| |
| |
| <span class="k">class</span> <span class="nc">_AFTSurvivalRegressionParams</span><span class="p">(</span> |
| <span class="n">_PredictorParams</span><span class="p">,</span> <span class="n">HasMaxIter</span><span class="p">,</span> <span class="n">HasTol</span><span class="p">,</span> <span class="n">HasFitIntercept</span><span class="p">,</span> <span class="n">HasAggregationDepth</span><span class="p">,</span> <span class="n">HasMaxBlockSizeInMB</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Params for :py:class:`AFTSurvivalRegression` and :py:class:`AFTSurvivalRegressionModel`.</span> |
| |
| <span class="sd"> .. versionadded:: 3.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">censorCol</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"censorCol"</span><span class="p">,</span> |
| <span class="s2">"censor column name. The value of this column could be 0 or 1. "</span> |
| <span class="o">+</span> <span class="s2">"If the value is 1, it means the event has occurred i.e. "</span> |
| <span class="o">+</span> <span class="s2">"uncensored; otherwise censored."</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="n">quantileProbabilities</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"quantileProbabilities"</span><span class="p">,</span> |
| <span class="s2">"quantile probabilities array. Values of the quantile probabilities array "</span> |
| <span class="o">+</span> <span class="s2">"should be in the range (0, 1) and the array should be non-empty."</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toListFloat</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="n">quantilesCol</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"quantilesCol"</span><span class="p">,</span> |
| <span class="s2">"quantiles column name. This column will output quantiles of "</span> |
| <span class="o">+</span> <span class="s2">"corresponding quantileProbabilities if it is set."</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">_AFTSurvivalRegressionParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span> |
| <span class="n">censorCol</span><span class="o">=</span><span class="s2">"censor"</span><span class="p">,</span> |
| <span class="n">quantileProbabilities</span><span class="o">=</span><span class="p">[</span><span class="mf">0.01</span><span class="p">,</span> <span class="mf">0.05</span><span class="p">,</span> <span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.25</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.75</span><span class="p">,</span> <span class="mf">0.9</span><span class="p">,</span> <span class="mf">0.95</span><span class="p">,</span> <span class="mf">0.99</span><span class="p">],</span> |
| <span class="n">maxIter</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> |
| <span class="n">tol</span><span class="o">=</span><span class="mf">1e-6</span><span class="p">,</span> |
| <span class="n">maxBlockSizeInMB</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">getCensorCol</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of censorCol or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">censorCol</span><span class="p">)</span> |
| |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">getQuantileProbabilities</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of quantileProbabilities or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quantileProbabilities</span><span class="p">)</span> |
| |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">getQuantilesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of quantilesCol or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">quantilesCol</span><span class="p">)</span> |
| |
| |
| <div class="viewcode-block" id="AFTSurvivalRegression"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.AFTSurvivalRegression.html#pyspark.ml.regression.AFTSurvivalRegression">[docs]</a><span class="nd">@inherit_doc</span> |
| <span class="k">class</span> <span class="nc">AFTSurvivalRegression</span><span class="p">(</span> |
| <span class="n">_JavaRegressor</span><span class="p">[</span><span class="s2">"AFTSurvivalRegressionModel"</span><span class="p">],</span> |
| <span class="n">_AFTSurvivalRegressionParams</span><span class="p">,</span> |
| <span class="n">JavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"AFTSurvivalRegression"</span><span class="p">],</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Accelerated Failure Time (AFT) Model Survival Regression</span> |
| |
| <span class="sd"> Fit a parametric AFT survival regression model based on the Weibull distribution</span> |
| <span class="sd"> of the survival time.</span> |
| |
| <span class="sd"> Notes</span> |
| <span class="sd"> -----</span> |
| <span class="sd"> For more information see Wikipedia page on</span> |
| <span class="sd"> `AFT Model <https://en.wikipedia.org/wiki/Accelerated_failure_time_model>`_</span> |
| |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.ml.linalg import Vectors</span> |
| <span class="sd"> >>> df = spark.createDataFrame([</span> |
| <span class="sd"> ... (1.0, Vectors.dense(1.0), 1.0),</span> |
| <span class="sd"> ... (1e-40, Vectors.sparse(1, [], []), 0.0)], ["label", "features", "censor"])</span> |
| <span class="sd"> >>> aftsr = AFTSurvivalRegression()</span> |
| <span class="sd"> >>> aftsr.setMaxIter(10)</span> |
| <span class="sd"> AFTSurvivalRegression...</span> |
| <span class="sd"> >>> aftsr.getMaxIter()</span> |
| <span class="sd"> 10</span> |
| <span class="sd"> >>> aftsr.clear(aftsr.maxIter)</span> |
| <span class="sd"> >>> model = aftsr.fit(df)</span> |
| <span class="sd"> >>> model.getMaxBlockSizeInMB()</span> |
| <span class="sd"> 0.0</span> |
| <span class="sd"> >>> model.setFeaturesCol("features")</span> |
| <span class="sd"> AFTSurvivalRegressionModel...</span> |
| <span class="sd"> >>> model.predict(Vectors.dense(6.3))</span> |
| <span class="sd"> 1.0</span> |
| <span class="sd"> >>> model.predictQuantiles(Vectors.dense(6.3))</span> |
| <span class="sd"> DenseVector([0.0101, 0.0513, 0.1054, 0.2877, 0.6931, 1.3863, 2.3026, 2.9957, 4.6052])</span> |
| <span class="sd"> >>> model.transform(df).show()</span> |
| <span class="sd"> +-------+---------+------+----------+</span> |
| <span class="sd"> | label| features|censor|prediction|</span> |
| <span class="sd"> +-------+---------+------+----------+</span> |
| <span class="sd"> | 1.0| [1.0]| 1.0| 1.0|</span> |
| <span class="sd"> |1.0E-40|(1,[],[])| 0.0| 1.0|</span> |
| <span class="sd"> +-------+---------+------+----------+</span> |
| <span class="sd"> ...</span> |
| <span class="sd"> >>> aftsr_path = temp_path + "/aftsr"</span> |
| <span class="sd"> >>> aftsr.save(aftsr_path)</span> |
| <span class="sd"> >>> aftsr2 = AFTSurvivalRegression.load(aftsr_path)</span> |
| <span class="sd"> >>> aftsr2.getMaxIter()</span> |
| <span class="sd"> 100</span> |
| <span class="sd"> >>> model_path = temp_path + "/aftsr_model"</span> |
| <span class="sd"> >>> model.save(model_path)</span> |
| <span class="sd"> >>> model2 = AFTSurvivalRegressionModel.load(model_path)</span> |
| <span class="sd"> >>> model.coefficients == model2.coefficients</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.intercept == model2.intercept</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.scale == model2.scale</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.transform(df).take(1) == model2.transform(df).take(1)</span> |
| <span class="sd"> True</span> |
| |
| <span class="sd"> .. versionadded:: 1.6.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> |
| |
| <span class="nd">@keyword_only</span> |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">fitIntercept</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span> |
| <span class="n">tol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1e-6</span><span class="p">,</span> |
| <span class="n">censorCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"censor"</span><span class="p">,</span> |
| <span class="n">quantileProbabilities</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span> |
| <span class="mf">0.01</span><span class="p">,</span> |
| <span class="mf">0.05</span><span class="p">,</span> |
| <span class="mf">0.1</span><span class="p">,</span> |
| <span class="mf">0.25</span><span class="p">,</span> |
| <span class="mf">0.5</span><span class="p">,</span> |
| <span class="mf">0.75</span><span class="p">,</span> |
| <span class="mf">0.9</span><span class="p">,</span> |
| <span class="mf">0.95</span><span class="p">,</span> |
| <span class="mf">0.99</span><span class="p">,</span> |
| <span class="p">],</span> <span class="c1"># noqa: B005</span> |
| <span class="n">quantilesCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">aggregationDepth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> |
| <span class="n">maxBlockSizeInMB</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \</span> |
| <span class="sd"> fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor", \</span> |
| <span class="sd"> quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], \</span> |
| <span class="sd"> quantilesCol=None, aggregationDepth=2, maxBlockSizeInMB=0.0)</span> |
| <span class="sd"> """</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">AFTSurvivalRegression</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> |
| <span class="s2">"org.apache.spark.ml.regression.AFTSurvivalRegression"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> |
| <span class="p">)</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="AFTSurvivalRegression.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.AFTSurvivalRegression.html#pyspark.ml.regression.AFTSurvivalRegression.setParams">[docs]</a> <span class="nd">@keyword_only</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">fitIntercept</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span> |
| <span class="n">tol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1e-6</span><span class="p">,</span> |
| <span class="n">censorCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"censor"</span><span class="p">,</span> |
| <span class="n">quantileProbabilities</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="p">[</span> |
| <span class="mf">0.01</span><span class="p">,</span> |
| <span class="mf">0.05</span><span class="p">,</span> |
| <span class="mf">0.1</span><span class="p">,</span> |
| <span class="mf">0.25</span><span class="p">,</span> |
| <span class="mf">0.5</span><span class="p">,</span> |
| <span class="mf">0.75</span><span class="p">,</span> |
| <span class="mf">0.9</span><span class="p">,</span> |
| <span class="mf">0.95</span><span class="p">,</span> |
| <span class="mf">0.99</span><span class="p">,</span> |
| <span class="p">],</span> <span class="c1"># noqa: B005</span> |
| <span class="n">quantilesCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">aggregationDepth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> |
| <span class="n">maxBlockSizeInMB</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"AFTSurvivalRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \</span> |
| <span class="sd"> fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor", \</span> |
| <span class="sd"> quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], \</span> |
| <span class="sd"> quantilesCol=None, aggregationDepth=2, maxBlockSizeInMB=0.0):</span> |
| <span class="sd"> """</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> |
| |
| <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"AFTSurvivalRegressionModel"</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">AFTSurvivalRegressionModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="AFTSurvivalRegression.setCensorCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.AFTSurvivalRegression.html#pyspark.ml.regression.AFTSurvivalRegression.setCensorCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setCensorCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"AFTSurvivalRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`censorCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">censorCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="AFTSurvivalRegression.setQuantileProbabilities"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.AFTSurvivalRegression.html#pyspark.ml.regression.AFTSurvivalRegression.setQuantileProbabilities">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setQuantileProbabilities</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"AFTSurvivalRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`quantileProbabilities`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">quantileProbabilities</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="AFTSurvivalRegression.setQuantilesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.AFTSurvivalRegression.html#pyspark.ml.regression.AFTSurvivalRegression.setQuantilesCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setQuantilesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"AFTSurvivalRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`quantilesCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">quantilesCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="AFTSurvivalRegression.setMaxIter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.AFTSurvivalRegression.html#pyspark.ml.regression.AFTSurvivalRegression.setMaxIter">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMaxIter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"AFTSurvivalRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxIter`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="AFTSurvivalRegression.setTol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.AFTSurvivalRegression.html#pyspark.ml.regression.AFTSurvivalRegression.setTol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setTol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"AFTSurvivalRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`tol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">tol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="AFTSurvivalRegression.setFitIntercept"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.AFTSurvivalRegression.html#pyspark.ml.regression.AFTSurvivalRegression.setFitIntercept">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setFitIntercept</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"AFTSurvivalRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`fitIntercept`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">fitIntercept</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="AFTSurvivalRegression.setAggregationDepth"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.AFTSurvivalRegression.html#pyspark.ml.regression.AFTSurvivalRegression.setAggregationDepth">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.1.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setAggregationDepth</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"AFTSurvivalRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`aggregationDepth`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">aggregationDepth</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="AFTSurvivalRegression.setMaxBlockSizeInMB"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.AFTSurvivalRegression.html#pyspark.ml.regression.AFTSurvivalRegression.setMaxBlockSizeInMB">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.1.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMaxBlockSizeInMB</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"AFTSurvivalRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxBlockSizeInMB`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxBlockSizeInMB</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> |
| |
| |
| <div class="viewcode-block" id="AFTSurvivalRegressionModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.AFTSurvivalRegressionModel.html#pyspark.ml.regression.AFTSurvivalRegressionModel">[docs]</a><span class="k">class</span> <span class="nc">AFTSurvivalRegressionModel</span><span class="p">(</span> |
| <span class="n">_JavaRegressionModel</span><span class="p">[</span><span class="n">Vector</span><span class="p">],</span> |
| <span class="n">_AFTSurvivalRegressionParams</span><span class="p">,</span> |
| <span class="n">JavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"AFTSurvivalRegressionModel"</span><span class="p">],</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model fitted by :class:`AFTSurvivalRegression`.</span> |
| |
| <span class="sd"> .. versionadded:: 1.6.0</span> |
| <span class="sd"> """</span> |
| |
| <div class="viewcode-block" id="AFTSurvivalRegressionModel.setQuantileProbabilities"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.AFTSurvivalRegressionModel.html#pyspark.ml.regression.AFTSurvivalRegressionModel.setQuantileProbabilities">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setQuantileProbabilities</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">])</span> <span class="o">-></span> <span class="s2">"AFTSurvivalRegressionModel"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`quantileProbabilities`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">quantileProbabilities</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="AFTSurvivalRegressionModel.setQuantilesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.AFTSurvivalRegressionModel.html#pyspark.ml.regression.AFTSurvivalRegressionModel.setQuantilesCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setQuantilesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"AFTSurvivalRegressionModel"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`quantilesCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">quantilesCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">coefficients</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model coefficients.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"coefficients"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">intercept</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model intercept.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"intercept"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"1.6.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">scale</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model scale parameter.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"scale"</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="AFTSurvivalRegressionModel.predictQuantiles"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.AFTSurvivalRegressionModel.html#pyspark.ml.regression.AFTSurvivalRegressionModel.predictQuantiles">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">predictQuantiles</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">features</span><span class="p">:</span> <span class="n">Vector</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Predicted Quantiles</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"predictQuantiles"</span><span class="p">,</span> <span class="n">features</span><span class="p">)</span></div></div> |
| |
| |
| <span class="k">class</span> <span class="nc">_GeneralizedLinearRegressionParams</span><span class="p">(</span> |
| <span class="n">_PredictorParams</span><span class="p">,</span> |
| <span class="n">HasFitIntercept</span><span class="p">,</span> |
| <span class="n">HasMaxIter</span><span class="p">,</span> |
| <span class="n">HasTol</span><span class="p">,</span> |
| <span class="n">HasRegParam</span><span class="p">,</span> |
| <span class="n">HasWeightCol</span><span class="p">,</span> |
| <span class="n">HasSolver</span><span class="p">,</span> |
| <span class="n">HasAggregationDepth</span><span class="p">,</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Params for :py:class:`GeneralizedLinearRegression` and</span> |
| <span class="sd"> :py:class:`GeneralizedLinearRegressionModel`.</span> |
| |
| <span class="sd"> .. versionadded:: 3.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">family</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"family"</span><span class="p">,</span> |
| <span class="s2">"The name of family which is a description of "</span> |
| <span class="o">+</span> <span class="s2">"the error distribution to be used in the model. Supported options: "</span> |
| <span class="o">+</span> <span class="s2">"gaussian (default), binomial, poisson, gamma and tweedie."</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="n">link</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"link"</span><span class="p">,</span> |
| <span class="s2">"The name of link function which provides the "</span> |
| <span class="o">+</span> <span class="s2">"relationship between the linear predictor and the mean of the distribution "</span> |
| <span class="o">+</span> <span class="s2">"function. Supported options: identity, log, inverse, logit, probit, cloglog "</span> |
| <span class="o">+</span> <span class="s2">"and sqrt."</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="n">linkPredictionCol</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"linkPredictionCol"</span><span class="p">,</span> |
| <span class="s2">"link prediction (linear "</span> <span class="o">+</span> <span class="s2">"predictor) column name"</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="n">variancePower</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"variancePower"</span><span class="p">,</span> |
| <span class="s2">"The power in the variance function "</span> |
| <span class="o">+</span> <span class="s2">"of the Tweedie distribution which characterizes the relationship "</span> |
| <span class="o">+</span> <span class="s2">"between the variance and mean of the distribution. Only applicable "</span> |
| <span class="o">+</span> <span class="s2">"for the Tweedie family. Supported values: 0 and [1, Inf)."</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="n">linkPower</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"linkPower"</span><span class="p">,</span> |
| <span class="s2">"The index in the power link function. "</span> <span class="o">+</span> <span class="s2">"Only applicable to the Tweedie family."</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="n">solver</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"solver"</span><span class="p">,</span> |
| <span class="s2">"The solver algorithm for optimization. Supported "</span> <span class="o">+</span> <span class="s2">"options: irls."</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> |
| <span class="p">)</span> |
| <span class="n">offsetCol</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"offsetCol"</span><span class="p">,</span> |
| <span class="s2">"The offset column name. If this is not set "</span> |
| <span class="o">+</span> <span class="s2">"or empty, we treat all instance offsets as 0.0"</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">_GeneralizedLinearRegressionParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span> |
| <span class="n">family</span><span class="o">=</span><span class="s2">"gaussian"</span><span class="p">,</span> |
| <span class="n">maxIter</span><span class="o">=</span><span class="mi">25</span><span class="p">,</span> |
| <span class="n">tol</span><span class="o">=</span><span class="mf">1e-6</span><span class="p">,</span> |
| <span class="n">regParam</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">solver</span><span class="o">=</span><span class="s2">"irls"</span><span class="p">,</span> |
| <span class="n">variancePower</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">aggregationDepth</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">getFamily</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of family or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">family</span><span class="p">)</span> |
| |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">getLinkPredictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of linkPredictionCol or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">linkPredictionCol</span><span class="p">)</span> |
| |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">getLink</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of link or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">link</span><span class="p">)</span> |
| |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">getVariancePower</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of variancePower or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">variancePower</span><span class="p">)</span> |
| |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">getLinkPower</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of linkPower or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">linkPower</span><span class="p">)</span> |
| |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">getOffsetCol</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of offsetCol or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">offsetCol</span><span class="p">)</span> |
| |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegression"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html#pyspark.ml.regression.GeneralizedLinearRegression">[docs]</a><span class="nd">@inherit_doc</span> |
| <span class="k">class</span> <span class="nc">GeneralizedLinearRegression</span><span class="p">(</span> |
| <span class="n">_JavaRegressor</span><span class="p">[</span><span class="s2">"GeneralizedLinearRegressionModel"</span><span class="p">],</span> |
| <span class="n">_GeneralizedLinearRegressionParams</span><span class="p">,</span> |
| <span class="n">JavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"GeneralizedLinearRegression"</span><span class="p">],</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Generalized Linear Regression.</span> |
| |
| <span class="sd"> Fit a Generalized Linear Model specified by giving a symbolic description of the linear</span> |
| <span class="sd"> predictor (link function) and a description of the error distribution (family). It supports</span> |
| <span class="sd"> "gaussian", "binomial", "poisson", "gamma" and "tweedie" as family. Valid link functions for</span> |
| <span class="sd"> each family is listed below. The first link function of each family is the default one.</span> |
| |
| <span class="sd"> * "gaussian" -> "identity", "log", "inverse"</span> |
| |
| <span class="sd"> * "binomial" -> "logit", "probit", "cloglog"</span> |
| |
| <span class="sd"> * "poisson" -> "log", "identity", "sqrt"</span> |
| |
| <span class="sd"> * "gamma" -> "inverse", "identity", "log"</span> |
| |
| <span class="sd"> * "tweedie" -> power link function specified through "linkPower". \</span> |
| <span class="sd"> The default link power in the tweedie family is 1 - variancePower.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| |
| <span class="sd"> Notes</span> |
| <span class="sd"> -----</span> |
| <span class="sd"> For more information see Wikipedia page on</span> |
| <span class="sd"> `GLM <https://en.wikipedia.org/wiki/Generalized_linear_model>`_</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.ml.linalg import Vectors</span> |
| <span class="sd"> >>> df = spark.createDataFrame([</span> |
| <span class="sd"> ... (1.0, Vectors.dense(0.0, 0.0)),</span> |
| <span class="sd"> ... (1.0, Vectors.dense(1.0, 2.0)),</span> |
| <span class="sd"> ... (2.0, Vectors.dense(0.0, 0.0)),</span> |
| <span class="sd"> ... (2.0, Vectors.dense(1.0, 1.0)),], ["label", "features"])</span> |
| <span class="sd"> >>> glr = GeneralizedLinearRegression(family="gaussian", link="identity", linkPredictionCol="p")</span> |
| <span class="sd"> >>> glr.setRegParam(0.1)</span> |
| <span class="sd"> GeneralizedLinearRegression...</span> |
| <span class="sd"> >>> glr.getRegParam()</span> |
| <span class="sd"> 0.1</span> |
| <span class="sd"> >>> glr.clear(glr.regParam)</span> |
| <span class="sd"> >>> glr.setMaxIter(10)</span> |
| <span class="sd"> GeneralizedLinearRegression...</span> |
| <span class="sd"> >>> glr.getMaxIter()</span> |
| <span class="sd"> 10</span> |
| <span class="sd"> >>> glr.clear(glr.maxIter)</span> |
| <span class="sd"> >>> model = glr.fit(df)</span> |
| <span class="sd"> >>> model.setFeaturesCol("features")</span> |
| <span class="sd"> GeneralizedLinearRegressionModel...</span> |
| <span class="sd"> >>> model.getMaxIter()</span> |
| <span class="sd"> 25</span> |
| <span class="sd"> >>> model.getAggregationDepth()</span> |
| <span class="sd"> 2</span> |
| <span class="sd"> >>> transformed = model.transform(df)</span> |
| <span class="sd"> >>> abs(transformed.head().prediction - 1.5) < 0.001</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> abs(transformed.head().p - 1.5) < 0.001</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.coefficients</span> |
| <span class="sd"> DenseVector([1.5..., -1.0...])</span> |
| <span class="sd"> >>> model.numFeatures</span> |
| <span class="sd"> 2</span> |
| <span class="sd"> >>> abs(model.intercept - 1.5) < 0.001</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> glr_path = temp_path + "/glr"</span> |
| <span class="sd"> >>> glr.save(glr_path)</span> |
| <span class="sd"> >>> glr2 = GeneralizedLinearRegression.load(glr_path)</span> |
| <span class="sd"> >>> glr.getFamily() == glr2.getFamily()</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model_path = temp_path + "/glr_model"</span> |
| <span class="sd"> >>> model.save(model_path)</span> |
| <span class="sd"> >>> model2 = GeneralizedLinearRegressionModel.load(model_path)</span> |
| <span class="sd"> >>> model.intercept == model2.intercept</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.coefficients[0] == model2.coefficients[0]</span> |
| <span class="sd"> True</span> |
| <span class="sd"> >>> model.transform(df).take(1) == model2.transform(df).take(1)</span> |
| <span class="sd"> True</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> |
| |
| <span class="nd">@keyword_only</span> |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">family</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"gaussian"</span><span class="p">,</span> |
| <span class="n">link</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">fitIntercept</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">25</span><span class="p">,</span> |
| <span class="n">tol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1e-6</span><span class="p">,</span> |
| <span class="n">regParam</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">solver</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"irls"</span><span class="p">,</span> |
| <span class="n">linkPredictionCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">variancePower</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">linkPower</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">offsetCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">aggregationDepth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> __init__(self, \\*, labelCol="label", featuresCol="features", predictionCol="prediction", \</span> |
| <span class="sd"> family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6, \</span> |
| <span class="sd"> regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None, \</span> |
| <span class="sd"> variancePower=0.0, linkPower=None, offsetCol=None, aggregationDepth=2)</span> |
| <span class="sd"> """</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">GeneralizedLinearRegression</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span> |
| <span class="s2">"org.apache.spark.ml.regression.GeneralizedLinearRegression"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span> |
| <span class="p">)</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| |
| <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegression.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html#pyspark.ml.regression.GeneralizedLinearRegression.setParams">[docs]</a> <span class="nd">@keyword_only</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">family</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"gaussian"</span><span class="p">,</span> |
| <span class="n">link</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">fitIntercept</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">25</span><span class="p">,</span> |
| <span class="n">tol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1e-6</span><span class="p">,</span> |
| <span class="n">regParam</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">weightCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">solver</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"irls"</span><span class="p">,</span> |
| <span class="n">linkPredictionCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">variancePower</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">linkPower</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">offsetCol</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="n">aggregationDepth</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> setParams(self, \\*, labelCol="label", featuresCol="features", predictionCol="prediction", \</span> |
| <span class="sd"> family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6, \</span> |
| <span class="sd"> regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None, \</span> |
| <span class="sd"> variancePower=0.0, linkPower=None, offsetCol=None, aggregationDepth=2)</span> |
| <span class="sd"> Sets params for generalized linear regression.</span> |
| <span class="sd"> """</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> |
| |
| <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegressionModel"</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">GeneralizedLinearRegressionModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegression.setFamily"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html#pyspark.ml.regression.GeneralizedLinearRegression.setFamily">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setFamily</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`family`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">family</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegression.setLinkPredictionCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html#pyspark.ml.regression.GeneralizedLinearRegression.setLinkPredictionCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setLinkPredictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`linkPredictionCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">linkPredictionCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegression.setLink"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html#pyspark.ml.regression.GeneralizedLinearRegression.setLink">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setLink</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`link`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">link</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegression.setVariancePower"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html#pyspark.ml.regression.GeneralizedLinearRegression.setVariancePower">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setVariancePower</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`variancePower`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">variancePower</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegression.setLinkPower"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html#pyspark.ml.regression.GeneralizedLinearRegression.setLinkPower">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setLinkPower</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`linkPower`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">linkPower</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegression.setOffsetCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html#pyspark.ml.regression.GeneralizedLinearRegression.setOffsetCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.3.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setOffsetCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`offsetCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">offsetCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegression.setMaxIter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html#pyspark.ml.regression.GeneralizedLinearRegression.setMaxIter">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMaxIter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxIter`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegression.setRegParam"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html#pyspark.ml.regression.GeneralizedLinearRegression.setRegParam">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setRegParam</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`regParam`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">regParam</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegression.setTol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html#pyspark.ml.regression.GeneralizedLinearRegression.setTol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setTol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`tol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">tol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegression.setFitIntercept"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html#pyspark.ml.regression.GeneralizedLinearRegression.setFitIntercept">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setFitIntercept</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`fitIntercept`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">fitIntercept</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegression.setWeightCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html#pyspark.ml.regression.GeneralizedLinearRegression.setWeightCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setWeightCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`weightCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">weightCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegression.setSolver"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html#pyspark.ml.regression.GeneralizedLinearRegression.setSolver">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setSolver</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`solver`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">solver</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegression.setAggregationDepth"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegression.html#pyspark.ml.regression.GeneralizedLinearRegression.setAggregationDepth">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setAggregationDepth</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegression"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`aggregationDepth`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">aggregationDepth</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> |
| |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegressionModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegressionModel.html#pyspark.ml.regression.GeneralizedLinearRegressionModel">[docs]</a><span class="k">class</span> <span class="nc">GeneralizedLinearRegressionModel</span><span class="p">(</span> |
| <span class="n">_JavaRegressionModel</span><span class="p">[</span><span class="n">Vector</span><span class="p">],</span> |
| <span class="n">_GeneralizedLinearRegressionParams</span><span class="p">,</span> |
| <span class="n">JavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"GeneralizedLinearRegressionModel"</span><span class="p">],</span> |
| <span class="n">HasTrainingSummary</span><span class="p">[</span><span class="s2">"GeneralizedLinearRegressionTrainingSummary"</span><span class="p">],</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model fitted by :class:`GeneralizedLinearRegression`.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| <span class="sd"> """</span> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegressionModel.setLinkPredictionCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegressionModel.html#pyspark.ml.regression.GeneralizedLinearRegressionModel.setLinkPredictionCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setLinkPredictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegressionModel"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`linkPredictionCol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">linkPredictionCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">coefficients</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model coefficients.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"coefficients"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">intercept</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model intercept.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"intercept"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">summary</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegressionTrainingSummary"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets summary (residuals, deviance, p-values) of model on</span> |
| <span class="sd"> training set. An exception is thrown if</span> |
| <span class="sd"> `trainingSummary is None`.</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">hasSummary</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">GeneralizedLinearRegressionTrainingSummary</span><span class="p">(</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">GeneralizedLinearRegressionModel</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">summary</span> |
| <span class="p">)</span> |
| <span class="k">else</span><span class="p">:</span> |
| <span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span> |
| <span class="s2">"No training summary available for this </span><span class="si">%s</span><span class="s2">"</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span> |
| <span class="p">)</span> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegressionModel.evaluate"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegressionModel.html#pyspark.ml.regression.GeneralizedLinearRegressionModel.evaluate">[docs]</a> <span class="k">def</span> <span class="nf">evaluate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"GeneralizedLinearRegressionSummary"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Evaluates the model on a test dataset.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> dataset : :py:class:`pyspark.sql.DataFrame`</span> |
| <span class="sd"> Test dataset to evaluate model on, where dataset is an</span> |
| <span class="sd"> instance of :py:class:`pyspark.sql.DataFrame`</span> |
| <span class="sd"> """</span> |
| <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">):</span> |
| <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">"dataset must be a DataFrame but got </span><span class="si">%s</span><span class="s2">."</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">dataset</span><span class="p">))</span> |
| <span class="n">java_glr_summary</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"evaluate"</span><span class="p">,</span> <span class="n">dataset</span><span class="p">)</span> |
| <span class="k">return</span> <span class="n">GeneralizedLinearRegressionSummary</span><span class="p">(</span><span class="n">java_glr_summary</span><span class="p">)</span></div></div> |
| |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegressionSummary"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegressionSummary.html#pyspark.ml.regression.GeneralizedLinearRegressionSummary">[docs]</a><span class="k">class</span> <span class="nc">GeneralizedLinearRegressionSummary</span><span class="p">(</span><span class="n">JavaWrapper</span><span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Generalized linear regression results evaluated on a dataset.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">predictions</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Predictions output by the model's `transform` method.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"predictions"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">predictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Field in :py:attr:`predictions` which gives the predicted value of each instance.</span> |
| <span class="sd"> This is set to a new column name if the original model's `predictionCol` is not set.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"predictionCol"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.2.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">numInstances</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Number of instances in DataFrame predictions.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"numInstances"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">rank</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> The numeric rank of the fitted linear model.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"rank"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">degreesOfFreedom</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Degrees of freedom.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"degreesOfFreedom"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">residualDegreeOfFreedom</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> The residual degrees of freedom.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"residualDegreeOfFreedom"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">residualDegreeOfFreedomNull</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> The residual degrees of freedom for the null model.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"residualDegreeOfFreedomNull"</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegressionSummary.residuals"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegressionSummary.html#pyspark.ml.regression.GeneralizedLinearRegressionSummary.residuals">[docs]</a> <span class="k">def</span> <span class="nf">residuals</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">residualsType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"deviance"</span><span class="p">)</span> <span class="o">-></span> <span class="n">DataFrame</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Get the residuals of the fitted model by type.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| |
| <span class="sd"> Parameters</span> |
| <span class="sd"> ----------</span> |
| <span class="sd"> residualsType : str, optional</span> |
| <span class="sd"> The type of residuals which should be returned.</span> |
| <span class="sd"> Supported options: deviance (default), pearson, working, and response.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"residuals"</span><span class="p">,</span> <span class="n">residualsType</span><span class="p">)</span></div> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">nullDeviance</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> The deviance for the null model.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"nullDeviance"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">deviance</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> The deviance for the fitted model.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"deviance"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">dispersion</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> The dispersion of the fitted model.</span> |
| <span class="sd"> It is taken as 1.0 for the "binomial" and "poisson" families, and otherwise</span> |
| <span class="sd"> estimated by the residual Pearson's Chi-Squared statistic (which is defined as</span> |
| <span class="sd"> sum of the squares of the Pearson residuals) divided by the residual degrees of freedom.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"dispersion"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">aic</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Akaike's "An Information Criterion"(AIC) for the fitted model.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"aic"</span><span class="p">)</span></div> |
| |
| |
| <div class="viewcode-block" id="GeneralizedLinearRegressionTrainingSummary"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.GeneralizedLinearRegressionTrainingSummary.html#pyspark.ml.regression.GeneralizedLinearRegressionTrainingSummary">[docs]</a><span class="nd">@inherit_doc</span> |
| <span class="k">class</span> <span class="nc">GeneralizedLinearRegressionTrainingSummary</span><span class="p">(</span><span class="n">GeneralizedLinearRegressionSummary</span><span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Generalized linear regression training results.</span> |
| |
| <span class="sd"> .. versionadded:: 2.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">numIterations</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Number of training iterations.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"numIterations"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">solver</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> The numeric solver used for training.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"solver"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">coefficientStandardErrors</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Standard error of estimated coefficients and intercept.</span> |
| |
| <span class="sd"> If :py:attr:`GeneralizedLinearRegression.fitIntercept` is set to True,</span> |
| <span class="sd"> then the last element returned corresponds to the intercept.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"coefficientStandardErrors"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">tValues</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> T-statistic of estimated coefficients and intercept.</span> |
| |
| <span class="sd"> If :py:attr:`GeneralizedLinearRegression.fitIntercept` is set to True,</span> |
| <span class="sd"> then the last element returned corresponds to the intercept.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"tValues"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"2.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">pValues</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Two-sided p-value of estimated coefficients and intercept.</span> |
| |
| <span class="sd"> If :py:attr:`GeneralizedLinearRegression.fitIntercept` is set to True,</span> |
| <span class="sd"> then the last element returned corresponds to the intercept.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"pValues"</span><span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"toString"</span><span class="p">)</span></div> |
| |
| |
| <span class="k">class</span> <span class="nc">_FactorizationMachinesParams</span><span class="p">(</span> |
| <span class="n">_PredictorParams</span><span class="p">,</span> |
| <span class="n">HasMaxIter</span><span class="p">,</span> |
| <span class="n">HasStepSize</span><span class="p">,</span> |
| <span class="n">HasTol</span><span class="p">,</span> |
| <span class="n">HasSolver</span><span class="p">,</span> |
| <span class="n">HasSeed</span><span class="p">,</span> |
| <span class="n">HasFitIntercept</span><span class="p">,</span> |
| <span class="n">HasRegParam</span><span class="p">,</span> |
| <span class="n">HasWeightCol</span><span class="p">,</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Params for :py:class:`FMRegressor`, :py:class:`FMRegressionModel`, :py:class:`FMClassifier`</span> |
| <span class="sd"> and :py:class:`FMClassifierModel`.</span> |
| |
| <span class="sd"> .. versionadded:: 3.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">factorSize</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"factorSize"</span><span class="p">,</span> |
| <span class="s2">"Dimensionality of the factor vectors, "</span> |
| <span class="o">+</span> <span class="s2">"which are used to get pairwise interactions between variables"</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toInt</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="n">fitLinear</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"fitLinear"</span><span class="p">,</span> |
| <span class="s2">"whether to fit linear term (aka 1-way term)"</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toBoolean</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="n">miniBatchFraction</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"miniBatchFraction"</span><span class="p">,</span> |
| <span class="s2">"fraction of the input data "</span> |
| <span class="o">+</span> <span class="s2">"set that should be used for one iteration of gradient descent"</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="n">initStd</span><span class="p">:</span> <span class="n">Param</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"initStd"</span><span class="p">,</span> |
| <span class="s2">"standard deviation of initial coefficients"</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toFloat</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="n">solver</span> <span class="o">=</span> <span class="n">Param</span><span class="p">(</span> |
| <span class="n">Params</span><span class="o">.</span><span class="n">_dummy</span><span class="p">(),</span> |
| <span class="s2">"solver"</span><span class="p">,</span> |
| <span class="s2">"The solver algorithm for optimization. Supported "</span> <span class="o">+</span> <span class="s2">"options: gd, adamW. (Default adamW)"</span><span class="p">,</span> |
| <span class="n">typeConverter</span><span class="o">=</span><span class="n">TypeConverters</span><span class="o">.</span><span class="n">toString</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">):</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">_FactorizationMachinesParams</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">)</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span> |
| <span class="n">factorSize</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> |
| <span class="n">fitIntercept</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> |
| <span class="n">fitLinear</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> |
| <span class="n">regParam</span><span class="o">=</span><span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">miniBatchFraction</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span> |
| <span class="n">initStd</span><span class="o">=</span><span class="mf">0.01</span><span class="p">,</span> |
| <span class="n">maxIter</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> |
| <span class="n">stepSize</span><span class="o">=</span><span class="mf">1.0</span><span class="p">,</span> |
| <span class="n">tol</span><span class="o">=</span><span class="mf">1e-6</span><span class="p">,</span> |
| <span class="n">solver</span><span class="o">=</span><span class="s2">"adamW"</span><span class="p">,</span> |
| <span class="p">)</span> |
| |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">getFactorSize</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of factorSize or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">factorSize</span><span class="p">)</span> |
| |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">getFitLinear</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of fitLinear or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">fitLinear</span><span class="p">)</span> |
| |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">getMiniBatchFraction</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of miniBatchFraction or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">miniBatchFraction</span><span class="p">)</span> |
| |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">getInitStd</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Gets the value of initStd or its default value.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOrDefault</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">initStd</span><span class="p">)</span> |
| |
| |
| <div class="viewcode-block" id="FMRegressor"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.FMRegressor.html#pyspark.ml.regression.FMRegressor">[docs]</a><span class="nd">@inherit_doc</span> |
| <span class="k">class</span> <span class="nc">FMRegressor</span><span class="p">(</span> |
| <span class="n">_JavaRegressor</span><span class="p">[</span><span class="s2">"FMRegressionModel"</span><span class="p">],</span> |
| <span class="n">_FactorizationMachinesParams</span><span class="p">,</span> |
| <span class="n">JavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"FMRegressor"</span><span class="p">],</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Factorization Machines learning algorithm for regression.</span> |
| |
| <span class="sd"> solver Supports:</span> |
| |
| <span class="sd"> * gd (normal mini-batch gradient descent)</span> |
| <span class="sd"> * adamW (default)</span> |
| |
| <span class="sd"> .. versionadded:: 3.0.0</span> |
| |
| <span class="sd"> Examples</span> |
| <span class="sd"> --------</span> |
| <span class="sd"> >>> from pyspark.ml.linalg import Vectors</span> |
| <span class="sd"> >>> from pyspark.ml.regression import FMRegressor</span> |
| <span class="sd"> >>> df = spark.createDataFrame([</span> |
| <span class="sd"> ... (2.0, Vectors.dense(2.0)),</span> |
| <span class="sd"> ... (1.0, Vectors.dense(1.0)),</span> |
| <span class="sd"> ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"])</span> |
| <span class="sd"> >>></span> |
| <span class="sd"> >>> fm = FMRegressor(factorSize=2)</span> |
| <span class="sd"> >>> fm.setSeed(16)</span> |
| <span class="sd"> FMRegressor...</span> |
| <span class="sd"> >>> model = fm.fit(df)</span> |
| <span class="sd"> >>> model.getMaxIter()</span> |
| <span class="sd"> 100</span> |
| <span class="sd"> >>> test0 = spark.createDataFrame([</span> |
| <span class="sd"> ... (Vectors.dense(-2.0),),</span> |
| <span class="sd"> ... (Vectors.dense(0.5),),</span> |
| <span class="sd"> ... (Vectors.dense(1.0),),</span> |
| <span class="sd"> ... (Vectors.dense(4.0),)], ["features"])</span> |
| <span class="sd"> >>> model.transform(test0).show(10, False)</span> |
| <span class="sd"> +--------+-------------------+</span> |
| <span class="sd"> |features|prediction |</span> |
| <span class="sd"> +--------+-------------------+</span> |
| <span class="sd"> |[-2.0] |-1.9989237712341565|</span> |
| <span class="sd"> |[0.5] |0.4956682219523814 |</span> |
| <span class="sd"> |[1.0] |0.994586620589689 |</span> |
| <span class="sd"> |[4.0] |3.9880970124135344 |</span> |
| <span class="sd"> +--------+-------------------+</span> |
| <span class="sd"> ...</span> |
| <span class="sd"> >>> model.intercept</span> |
| <span class="sd"> -0.0032501766849261557</span> |
| <span class="sd"> >>> model.linear</span> |
| <span class="sd"> DenseVector([0.9978])</span> |
| <span class="sd"> >>> model.factors</span> |
| <span class="sd"> DenseMatrix(1, 2, [0.0173, 0.0021], 1)</span> |
| <span class="sd"> >>> model_path = temp_path + "/fm_model"</span> |
| <span class="sd"> >>> model.save(model_path)</span> |
| <span class="sd"> >>> model2 = FMRegressionModel.load(model_path)</span> |
| <span class="sd"> >>> model2.intercept</span> |
| <span class="sd"> -0.0032501766849261557</span> |
| <span class="sd"> >>> model2.linear</span> |
| <span class="sd"> DenseVector([0.9978])</span> |
| <span class="sd"> >>> model2.factors</span> |
| <span class="sd"> DenseMatrix(1, 2, [0.0173, 0.0021], 1)</span> |
| <span class="sd"> >>> model.transform(test0).take(1) == model2.transform(test0).take(1)</span> |
| <span class="sd"> True</span> |
| <span class="sd"> """</span> |
| |
| <span class="n">_input_kwargs</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> |
| |
| <span class="nd">@keyword_only</span> |
| <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">factorSize</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">8</span><span class="p">,</span> |
| <span class="n">fitIntercept</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="n">fitLinear</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="n">regParam</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">miniBatchFraction</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> |
| <span class="n">initStd</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.01</span><span class="p">,</span> |
| <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span> |
| <span class="n">stepSize</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> |
| <span class="n">tol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1e-6</span><span class="p">,</span> |
| <span class="n">solver</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"adamW"</span><span class="p">,</span> |
| <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \</span> |
| <span class="sd"> factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0, \</span> |
| <span class="sd"> miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0, \</span> |
| <span class="sd"> tol=1e-6, solver="adamW", seed=None)</span> |
| <span class="sd"> """</span> |
| <span class="nb">super</span><span class="p">(</span><span class="n">FMRegressor</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">_java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">"org.apache.spark.ml.regression.FMRegressor"</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span><span class="p">)</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| <span class="bp">self</span><span class="o">.</span><span class="n">setParams</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="FMRegressor.setParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.FMRegressor.html#pyspark.ml.regression.FMRegressor.setParams">[docs]</a> <span class="nd">@keyword_only</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setParams</span><span class="p">(</span> |
| <span class="bp">self</span><span class="p">,</span> |
| <span class="o">*</span><span class="p">,</span> |
| <span class="n">featuresCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"features"</span><span class="p">,</span> |
| <span class="n">labelCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"label"</span><span class="p">,</span> |
| <span class="n">predictionCol</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"prediction"</span><span class="p">,</span> |
| <span class="n">factorSize</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">8</span><span class="p">,</span> |
| <span class="n">fitIntercept</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="n">fitLinear</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> |
| <span class="n">regParam</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span> |
| <span class="n">miniBatchFraction</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> |
| <span class="n">initStd</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.01</span><span class="p">,</span> |
| <span class="n">maxIter</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span> |
| <span class="n">stepSize</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span> |
| <span class="n">tol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1e-6</span><span class="p">,</span> |
| <span class="n">solver</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">"adamW"</span><span class="p">,</span> |
| <span class="n">seed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> |
| <span class="p">)</span> <span class="o">-></span> <span class="s2">"FMRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \</span> |
| <span class="sd"> factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0, \</span> |
| <span class="sd"> miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0, \</span> |
| <span class="sd"> tol=1e-6, solver="adamW", seed=None)</span> |
| <span class="sd"> Sets Params for FMRegressor.</span> |
| <span class="sd"> """</span> |
| <span class="n">kwargs</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_input_kwargs</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div> |
| |
| <span class="k">def</span> <span class="nf">_create_model</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">java_model</span><span class="p">:</span> <span class="s2">"JavaObject"</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"FMRegressionModel"</span><span class="p">:</span> |
| <span class="k">return</span> <span class="n">FMRegressionModel</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span> |
| |
| <div class="viewcode-block" id="FMRegressor.setFactorSize"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.FMRegressor.html#pyspark.ml.regression.FMRegressor.setFactorSize">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setFactorSize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"FMRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`factorSize`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">factorSize</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="FMRegressor.setFitLinear"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.FMRegressor.html#pyspark.ml.regression.FMRegressor.setFitLinear">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setFitLinear</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"FMRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`fitLinear`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">fitLinear</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="FMRegressor.setMiniBatchFraction"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.FMRegressor.html#pyspark.ml.regression.FMRegressor.setMiniBatchFraction">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMiniBatchFraction</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"FMRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`miniBatchFraction`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">miniBatchFraction</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="FMRegressor.setInitStd"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.FMRegressor.html#pyspark.ml.regression.FMRegressor.setInitStd">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setInitStd</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"FMRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`initStd`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">initStd</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="FMRegressor.setMaxIter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.FMRegressor.html#pyspark.ml.regression.FMRegressor.setMaxIter">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setMaxIter</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"FMRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`maxIter`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">maxIter</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="FMRegressor.setStepSize"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.FMRegressor.html#pyspark.ml.regression.FMRegressor.setStepSize">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setStepSize</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"FMRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`stepSize`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">stepSize</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="FMRegressor.setTol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.FMRegressor.html#pyspark.ml.regression.FMRegressor.setTol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setTol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"FMRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`tol`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">tol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="FMRegressor.setSolver"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.FMRegressor.html#pyspark.ml.regression.FMRegressor.setSolver">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setSolver</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"FMRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`solver`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">solver</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="FMRegressor.setSeed"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.FMRegressor.html#pyspark.ml.regression.FMRegressor.setSeed">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setSeed</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"FMRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`seed`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">seed</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="FMRegressor.setFitIntercept"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.FMRegressor.html#pyspark.ml.regression.FMRegressor.setFitIntercept">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setFitIntercept</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">bool</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"FMRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`fitIntercept`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">fitIntercept</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div> |
| |
| <div class="viewcode-block" id="FMRegressor.setRegParam"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.FMRegressor.html#pyspark.ml.regression.FMRegressor.setRegParam">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">setRegParam</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-></span> <span class="s2">"FMRegressor"</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Sets the value of :py:attr:`regParam`.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">regParam</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div> |
| |
| |
| <div class="viewcode-block" id="FMRegressionModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.regression.FMRegressionModel.html#pyspark.ml.regression.FMRegressionModel">[docs]</a><span class="k">class</span> <span class="nc">FMRegressionModel</span><span class="p">(</span> |
| <span class="n">_JavaRegressionModel</span><span class="p">,</span> |
| <span class="n">_FactorizationMachinesParams</span><span class="p">,</span> |
| <span class="n">JavaMLWritable</span><span class="p">,</span> |
| <span class="n">JavaMLReadable</span><span class="p">[</span><span class="s2">"FMRegressionModel"</span><span class="p">],</span> |
| <span class="p">):</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model fitted by :class:`FMRegressor`.</span> |
| |
| <span class="sd"> .. versionadded:: 3.0.0</span> |
| <span class="sd"> """</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">intercept</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="nb">float</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model intercept.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"intercept"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">linear</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Vector</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model linear term.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"linear"</span><span class="p">)</span> |
| |
| <span class="nd">@property</span> |
| <span class="nd">@since</span><span class="p">(</span><span class="s2">"3.0.0"</span><span class="p">)</span> |
| <span class="k">def</span> <span class="nf">factors</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-></span> <span class="n">Matrix</span><span class="p">:</span> |
| <span class="w"> </span><span class="sd">"""</span> |
| <span class="sd"> Model factor term.</span> |
| <span class="sd"> """</span> |
| <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">"factors"</span><span class="p">)</span></div> |
| |
| |
| <span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span> |
| <span class="kn">import</span> <span class="nn">doctest</span> |
| <span class="kn">import</span> <span class="nn">pyspark.ml.regression</span> |
| <span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span> |
| |
| <span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">ml</span><span class="o">.</span><span class="n">regression</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span> |
| <span class="c1"># The small batch size here ensures that we see multiple batches,</span> |
| <span class="c1"># even in these small test examples:</span> |
| <span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">master</span><span class="p">(</span><span class="s2">"local[2]"</span><span class="p">)</span><span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">"ml.regression tests"</span><span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span> |
| <span class="n">sc</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">sparkContext</span> |
| <span class="n">globs</span><span class="p">[</span><span class="s2">"sc"</span><span class="p">]</span> <span class="o">=</span> <span class="n">sc</span> |
| <span class="n">globs</span><span class="p">[</span><span class="s2">"spark"</span><span class="p">]</span> <span class="o">=</span> <span class="n">spark</span> |
| <span class="kn">import</span> <span class="nn">tempfile</span> |
| |
| <span class="n">temp_path</span> <span class="o">=</span> <span class="n">tempfile</span><span class="o">.</span><span class="n">mkdtemp</span><span class="p">()</span> |
| <span class="n">globs</span><span class="p">[</span><span class="s2">"temp_path"</span><span class="p">]</span> <span class="o">=</span> <span class="n">temp_path</span> |
| <span class="k">try</span><span class="p">:</span> |
| <span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span><span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span> <span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span><span class="p">)</span> |
| <span class="n">spark</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span> |
| <span class="k">finally</span><span class="p">:</span> |
| <span class="kn">from</span> <span class="nn">shutil</span> <span class="kn">import</span> <span class="n">rmtree</span> |
| |
| <span class="k">try</span><span class="p">:</span> |
| <span class="n">rmtree</span><span class="p">(</span><span class="n">temp_path</span><span class="p">)</span> |
| <span class="k">except</span> <span class="ne">OSError</span><span class="p">:</span> |
| <span class="k">pass</span> |
| <span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span> |
| <span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span> |
| </pre></div> |
| |
| </article> |
| |
| |
| |
| <footer class="bd-footer-article"> |
| |
| <div class="footer-article-items footer-article__inner"> |
| |
| <div class="footer-article-item"><!-- Previous / next buttons --> |
| <div class="prev-next-area"> |
| </div></div> |
| |
| </div> |
| |
| </footer> |
| |
| </div> |
| |
| |
| |
| |
| </div> |
| <footer class="bd-footer-content"> |
| |
| </footer> |
| |
| </main> |
| </div> |
| </div> |
| |
| <!-- Scripts loaded after <body> so the DOM is not blocked --> |
| <script src="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52"></script> |
| <script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52"></script> |
| |
| <footer class="bd-footer"> |
| <div class="bd-footer__inner bd-page-width"> |
| |
| <div class="footer-items__start"> |
| |
| <div class="footer-item"><p class="copyright"> |
| Copyright @ 2024 The Apache Software Foundation, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>. |
| </p></div> |
| |
| <div class="footer-item"> |
| <p class="sphinx-version"> |
| Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 4.5.0. |
| <br/> |
| </p> |
| </div> |
| |
| </div> |
| |
| |
| <div class="footer-items__end"> |
| |
| <div class="footer-item"><p class="theme-version"> |
| Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.13.3. |
| </p></div> |
| |
| </div> |
| |
| </div> |
| |
| </footer> |
| </body> |
| </html> |