blob: 0bf5d7d28b68353d230b96c4ea53f14b878e5632 [file] [log] [blame]
<!DOCTYPE html>
<html >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>pyspark.mllib.regression &#8212; PySpark 4.0.0-preview1 documentation</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../../../_static/styles/theme.css?digest=e353d410970836974a52" rel="stylesheet" />
<link href="../../../_static/styles/bootstrap.css?digest=e353d410970836974a52" rel="stylesheet" />
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=e353d410970836974a52" rel="stylesheet" />
<link href="../../../_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=e353d410970836974a52" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52" />
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/clipboard.min.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = '_modules/pyspark/mllib/regression';</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/mllib/regression.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="None">
<!-- Matomo -->
<script type="text/javascript">
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a class="skip-link" href="#main-content">Skip to main content</a>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../../../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<nav class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../../../index.html">
<img src="../../../_static/spark-logo-light.png" class="logo__image only-light" alt="Logo image"/>
<script>document.write(`<img src="../../../_static/spark-logo-dark.png" class="logo__image only-dark" alt="Logo image"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item"><nav class="navbar-nav">
<p class="sidebar-header-items__title"
role="heading"
aria-level="1"
aria-label="Site Navigation">
Site Navigation
</p>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../index.html">
Overview
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../getting_started/index.html">
Getting Started
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../user_guide/index.html">
User Guides
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../reference/index.html">
API Reference
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../development/index.html">
Development
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
</button>
`);
</script>
</div>
<div class="navbar-item"><!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
4.0.0-preview1
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "_modules/pyspark/mllib/regression.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span>
<span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span>
<span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span>
<label class="sr-only">GitHub</label></a>
</li>
<li class="nav-item">
<a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span>
<label class="sr-only">PyPI</label></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
</button>
`);
</script>
</div>
</div>
</nav>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar hide-on-wide">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item"><nav class="navbar-nav">
<p class="sidebar-header-items__title"
role="heading"
aria-level="1"
aria-label="Site Navigation">
Site Navigation
</p>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../index.html">
Overview
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../getting_started/index.html">
Getting Started
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../user_guide/index.html">
User Guides
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../reference/index.html">
API Reference
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../development/index.html">
Development
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item"><!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
4.0.0-preview1
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "_modules/pyspark/mllib/regression.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span>
<span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span>
<span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span>
<label class="sr-only">GitHub</label></a>
</li>
<li class="nav-item">
<a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span>
<label class="sr-only">PyPI</label></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumbs">
<ul class="bd-breadcrumbs" role="navigation" aria-label="Breadcrumb">
<li class="breadcrumb-item breadcrumb-home">
<a href="../../../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="../../index.html" class="nav-link">Module code</a></li>
<li class="breadcrumb-item active" aria-current="page">pyspark.mllib.regression</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article" role="main">
<h1>Source code for pyspark.mllib.regression</h1><div class="highlight"><pre>
<span></span><span class="c1">#</span>
<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
<span class="c1"># this work for additional information regarding copyright ownership.</span>
<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
<span class="c1"># (the &quot;License&quot;); you may not use this file except in compliance with</span>
<span class="c1"># the License. You may obtain a copy of the License at</span>
<span class="c1">#</span>
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="c1">#</span>
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
<span class="c1"># See the License for the specific language governing permissions and</span>
<span class="c1"># limitations under the License.</span>
<span class="c1">#</span>
<span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">import</span> <span class="nn">warnings</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">Any</span><span class="p">,</span>
<span class="n">Callable</span><span class="p">,</span>
<span class="n">Iterable</span><span class="p">,</span>
<span class="n">Optional</span><span class="p">,</span>
<span class="n">Tuple</span><span class="p">,</span>
<span class="n">Type</span><span class="p">,</span>
<span class="n">TypeVar</span><span class="p">,</span>
<span class="n">Union</span><span class="p">,</span>
<span class="n">overload</span><span class="p">,</span>
<span class="n">TYPE_CHECKING</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">RDD</span><span class="p">,</span> <span class="n">since</span>
<span class="kn">from</span> <span class="nn">pyspark.streaming.dstream</span> <span class="kn">import</span> <span class="n">DStream</span>
<span class="kn">from</span> <span class="nn">pyspark.mllib.common</span> <span class="kn">import</span> <span class="n">callMLlibFunc</span><span class="p">,</span> <span class="n">_py2java</span><span class="p">,</span> <span class="n">_java2py</span><span class="p">,</span> <span class="n">inherit_doc</span>
<span class="kn">from</span> <span class="nn">pyspark.mllib.linalg</span> <span class="kn">import</span> <span class="n">_convert_to_vector</span>
<span class="kn">from</span> <span class="nn">pyspark.mllib.util</span> <span class="kn">import</span> <span class="n">Saveable</span><span class="p">,</span> <span class="n">Loader</span>
<span class="kn">from</span> <span class="nn">pyspark.core.rdd</span> <span class="kn">import</span> <span class="n">RDD</span>
<span class="kn">from</span> <span class="nn">pyspark.core.context</span> <span class="kn">import</span> <span class="n">SparkContext</span>
<span class="kn">from</span> <span class="nn">pyspark.mllib.linalg</span> <span class="kn">import</span> <span class="n">Vector</span>
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">pyspark.mllib._typing</span> <span class="kn">import</span> <span class="n">VectorLike</span>
<span class="n">LM</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;LM&quot;</span><span class="p">)</span>
<span class="n">K</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;K&quot;</span><span class="p">)</span>
<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span>
<span class="s2">&quot;LabeledPoint&quot;</span><span class="p">,</span>
<span class="s2">&quot;LinearModel&quot;</span><span class="p">,</span>
<span class="s2">&quot;LinearRegressionModel&quot;</span><span class="p">,</span>
<span class="s2">&quot;LinearRegressionWithSGD&quot;</span><span class="p">,</span>
<span class="s2">&quot;RidgeRegressionModel&quot;</span><span class="p">,</span>
<span class="s2">&quot;RidgeRegressionWithSGD&quot;</span><span class="p">,</span>
<span class="s2">&quot;LassoModel&quot;</span><span class="p">,</span>
<span class="s2">&quot;LassoWithSGD&quot;</span><span class="p">,</span>
<span class="s2">&quot;IsotonicRegressionModel&quot;</span><span class="p">,</span>
<span class="s2">&quot;IsotonicRegression&quot;</span><span class="p">,</span>
<span class="s2">&quot;StreamingLinearAlgorithm&quot;</span><span class="p">,</span>
<span class="s2">&quot;StreamingLinearRegressionWithSGD&quot;</span><span class="p">,</span>
<span class="p">]</span>
<div class="viewcode-block" id="LabeledPoint"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.LabeledPoint.html#pyspark.mllib.regression.LabeledPoint">[docs]</a><span class="k">class</span> <span class="nc">LabeledPoint</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Class that represents the features and labels of a data point.</span>
<span class="sd"> .. versionadded:: 1.0.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> label : int</span>
<span class="sd"> Label for this data point.</span>
<span class="sd"> features : :py:class:`pyspark.mllib.linalg.Vector` or convertible</span>
<span class="sd"> Vector of features for this point (NumPy array, list,</span>
<span class="sd"> pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix).</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> &#39;label&#39; and &#39;features&#39; are accessible as class attributes.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">label</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">features</span><span class="p">:</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">float</span><span class="p">]):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">label</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">label</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">features</span> <span class="o">=</span> <span class="n">_convert_to_vector</span><span class="p">(</span><span class="n">features</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">__reduce__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">Type</span><span class="p">[</span><span class="s2">&quot;LabeledPoint&quot;</span><span class="p">],</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="n">Vector</span><span class="p">]]:</span>
<span class="k">return</span> <span class="p">(</span><span class="n">LabeledPoint</span><span class="p">,</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">label</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">features</span><span class="p">))</span>
<span class="k">def</span> <span class="fm">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="k">return</span> <span class="s2">&quot;(&quot;</span> <span class="o">+</span> <span class="s2">&quot;,&quot;</span><span class="o">.</span><span class="n">join</span><span class="p">((</span><span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">label</span><span class="p">),</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">features</span><span class="p">)))</span> <span class="o">+</span> <span class="s2">&quot;)&quot;</span>
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="k">return</span> <span class="s2">&quot;LabeledPoint(</span><span class="si">%s</span><span class="s2">, </span><span class="si">%s</span><span class="s2">)&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">label</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">features</span><span class="p">)</span></div>
<div class="viewcode-block" id="LinearModel"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.LinearModel.html#pyspark.mllib.regression.LinearModel">[docs]</a><span class="k">class</span> <span class="nc">LinearModel</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> A linear model that has a vector of coefficients and an intercept.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> weights : :py:class:`pyspark.mllib.linalg.Vector`</span>
<span class="sd"> Weights computed for every feature.</span>
<span class="sd"> intercept : float</span>
<span class="sd"> Intercept computed for this model.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">weights</span><span class="p">:</span> <span class="n">Vector</span><span class="p">,</span> <span class="n">intercept</span><span class="p">:</span> <span class="nb">float</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_coeff</span> <span class="o">=</span> <span class="n">_convert_to_vector</span><span class="p">(</span><span class="n">weights</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_intercept</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">intercept</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.0.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">weights</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Vector</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Weights computed for every feature.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coeff</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.0.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">intercept</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Intercept computed for this model.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_intercept</span>
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="k">return</span> <span class="s2">&quot;(weights=</span><span class="si">%s</span><span class="s2">, intercept=</span><span class="si">%r</span><span class="s2">)&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_coeff</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_intercept</span><span class="p">)</span></div>
<span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">LinearRegressionModelBase</span><span class="p">(</span><span class="n">LinearModel</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;A linear regression model.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.mllib.linalg import SparseVector</span>
<span class="sd"> &gt;&gt;&gt; lrmb = LinearRegressionModelBase(np.array([1.0, 2.0]), 0.1)</span>
<span class="sd"> &gt;&gt;&gt; abs(lrmb.predict(np.array([-1.03, 7.777])) - 14.624) &lt; 1e-6</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrmb.predict(SparseVector(2, {0: -1.03, 1: 7.777})) - 14.624) &lt; 1e-6</span>
<span class="sd"> True</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="s2">&quot;VectorLike&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">RDD</span><span class="p">[</span><span class="nb">float</span><span class="p">]:</span>
<span class="o">...</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">,</span> <span class="n">RDD</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="n">RDD</span><span class="p">[</span><span class="nb">float</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Predict the value of the dependent variable given a vector or</span>
<span class="sd"> an RDD of vectors containing values for the independent variables.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span>
<span class="k">return</span> <span class="n">x</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">predict</span><span class="p">)</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">_convert_to_vector</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">weights</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">intercept</span> <span class="c1"># type: ignore[attr-defined]</span>
<div class="viewcode-block" id="LinearRegressionModel"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.LinearRegressionModel.html#pyspark.mllib.regression.LinearRegressionModel">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">LinearRegressionModel</span><span class="p">(</span><span class="n">LinearRegressionModelBase</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;A linear regression model derived from a least-squares fit.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.mllib.linalg import SparseVector</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.mllib.regression import LabeledPoint</span>
<span class="sd"> &gt;&gt;&gt; data = [</span>
<span class="sd"> ... LabeledPoint(0.0, [0.0]),</span>
<span class="sd"> ... LabeledPoint(1.0, [1.0]),</span>
<span class="sd"> ... LabeledPoint(3.0, [2.0]),</span>
<span class="sd"> ... LabeledPoint(2.0, [3.0])</span>
<span class="sd"> ... ]</span>
<span class="sd"> &gt;&gt;&gt; lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,</span>
<span class="sd"> ... initialWeights=np.array([1.0]))</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(np.array([0.0])) - 0) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(np.array([1.0])) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; import os, tempfile</span>
<span class="sd"> &gt;&gt;&gt; path = tempfile.mkdtemp()</span>
<span class="sd"> &gt;&gt;&gt; lrm.save(sc, path)</span>
<span class="sd"> &gt;&gt;&gt; sameModel = LinearRegressionModel.load(sc, path)</span>
<span class="sd"> &gt;&gt;&gt; abs(sameModel.predict(np.array([0.0])) - 0) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(sameModel.predict(np.array([1.0])) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; from shutil import rmtree</span>
<span class="sd"> &gt;&gt;&gt; try:</span>
<span class="sd"> ... rmtree(path)</span>
<span class="sd"> ... except BaseException:</span>
<span class="sd"> ... pass</span>
<span class="sd"> &gt;&gt;&gt; data = [</span>
<span class="sd"> ... LabeledPoint(0.0, SparseVector(1, {0: 0.0})),</span>
<span class="sd"> ... LabeledPoint(1.0, SparseVector(1, {0: 1.0})),</span>
<span class="sd"> ... LabeledPoint(3.0, SparseVector(1, {0: 2.0})),</span>
<span class="sd"> ... LabeledPoint(2.0, SparseVector(1, {0: 3.0}))</span>
<span class="sd"> ... ]</span>
<span class="sd"> &gt;&gt;&gt; lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,</span>
<span class="sd"> ... initialWeights=np.array([1.0]))</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(np.array([0.0])) - 0) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10, step=1.0,</span>
<span class="sd"> ... miniBatchFraction=1.0, initialWeights=np.array([1.0]), regParam=0.1, regType=&quot;l2&quot;,</span>
<span class="sd"> ... intercept=True, validateData=True)</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(np.array([0.0])) - 0) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="LinearRegressionModel.save"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.LinearRegressionModel.html#pyspark.mllib.regression.LinearRegressionModel.save">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Save a LinearRegressionModel.&quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">regression</span><span class="o">.</span><span class="n">LinearRegressionModel</span><span class="p">(</span>
<span class="n">_py2java</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coeff</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">intercept</span>
<span class="p">)</span>
<span class="n">java_model</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">sc</span><span class="o">.</span><span class="n">_jsc</span><span class="o">.</span><span class="n">sc</span><span class="p">(),</span> <span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="LinearRegressionModel.load"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.LinearRegressionModel.html#pyspark.mllib.regression.LinearRegressionModel.load">[docs]</a> <span class="nd">@classmethod</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;LinearRegressionModel&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Load a LinearRegressionModel.&quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">regression</span><span class="o">.</span><span class="n">LinearRegressionModel</span><span class="o">.</span><span class="n">load</span><span class="p">(</span>
<span class="n">sc</span><span class="o">.</span><span class="n">_jsc</span><span class="o">.</span><span class="n">sc</span><span class="p">(),</span> <span class="n">path</span>
<span class="p">)</span>
<span class="n">weights</span> <span class="o">=</span> <span class="n">_java2py</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">java_model</span><span class="o">.</span><span class="n">weights</span><span class="p">())</span>
<span class="n">intercept</span> <span class="o">=</span> <span class="n">java_model</span><span class="o">.</span><span class="n">intercept</span><span class="p">()</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">LinearRegressionModel</span><span class="p">(</span><span class="n">weights</span><span class="p">,</span> <span class="n">intercept</span><span class="p">)</span>
<span class="k">return</span> <span class="n">model</span></div></div>
<span class="c1"># train_func should take two parameters, namely data and initial_weights, and</span>
<span class="c1"># return the result of a call to the appropriate JVM stub.</span>
<span class="c1"># _regression_train_wrapper is responsible for setup and error checking.</span>
<span class="k">def</span> <span class="nf">_regression_train_wrapper</span><span class="p">(</span>
<span class="n">train_func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">],</span> <span class="n">Vector</span><span class="p">],</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Any</span><span class="p">]],</span>
<span class="n">modelClass</span><span class="p">:</span> <span class="n">Type</span><span class="p">[</span><span class="n">LM</span><span class="p">],</span>
<span class="n">data</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">],</span>
<span class="n">initial_weights</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">],</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">LM</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">pyspark.mllib.classification</span> <span class="kn">import</span> <span class="n">LogisticRegressionModel</span>
<span class="n">first</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">first</span><span class="p">,</span> <span class="n">LabeledPoint</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;data should be an RDD of LabeledPoint, but got </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">first</span><span class="p">))</span>
<span class="k">if</span> <span class="n">initial_weights</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">initial_weights</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.0</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">first</span><span class="p">()</span><span class="o">.</span><span class="n">features</span><span class="p">)</span>
<span class="k">if</span> <span class="n">modelClass</span> <span class="o">==</span> <span class="n">LogisticRegressionModel</span><span class="p">:</span>
<span class="n">weights</span><span class="p">,</span> <span class="n">intercept</span><span class="p">,</span> <span class="n">numFeatures</span><span class="p">,</span> <span class="n">numClasses</span> <span class="o">=</span> <span class="n">train_func</span><span class="p">(</span>
<span class="n">data</span><span class="p">,</span> <span class="n">_convert_to_vector</span><span class="p">(</span><span class="n">initial_weights</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">modelClass</span><span class="p">(</span><span class="n">weights</span><span class="p">,</span> <span class="n">intercept</span><span class="p">,</span> <span class="n">numFeatures</span><span class="p">,</span> <span class="n">numClasses</span><span class="p">)</span> <span class="c1"># type: ignore[call-arg]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">weights</span><span class="p">,</span> <span class="n">intercept</span> <span class="o">=</span> <span class="n">train_func</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">_convert_to_vector</span><span class="p">(</span><span class="n">initial_weights</span><span class="p">))</span>
<span class="k">return</span> <span class="n">modelClass</span><span class="p">(</span><span class="n">weights</span><span class="p">,</span> <span class="n">intercept</span><span class="p">)</span> <span class="c1"># type: ignore[call-arg]</span>
<div class="viewcode-block" id="LinearRegressionWithSGD"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.LinearRegressionWithSGD.html#pyspark.mllib.regression.LinearRegressionWithSGD">[docs]</a><span class="k">class</span> <span class="nc">LinearRegressionWithSGD</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train a linear regression model with no regularization using Stochastic Gradient Descent.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> .. deprecated:: 2.0.0</span>
<span class="sd"> Use :py:class:`pyspark.ml.regression.LinearRegression`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="LinearRegressionWithSGD.train"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.LinearRegressionWithSGD.html#pyspark.mllib.regression.LinearRegressionWithSGD.train">[docs]</a> <span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span>
<span class="bp">cls</span><span class="p">,</span>
<span class="n">data</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">],</span>
<span class="n">iterations</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span>
<span class="n">step</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span>
<span class="n">miniBatchFraction</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span>
<span class="n">initialWeights</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">regParam</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span>
<span class="n">regType</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">intercept</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">validateData</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">convergenceTol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.001</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">LinearRegressionModel</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train a linear regression model using Stochastic Gradient</span>
<span class="sd"> Descent (SGD). This solves the least squares regression</span>
<span class="sd"> formulation</span>
<span class="sd"> f(weights) = 1/(2n) ||A weights - y||^2</span>
<span class="sd"> which is the mean squared error. Here the data matrix has n rows,</span>
<span class="sd"> and the input RDD holds the set of rows of A, each with its</span>
<span class="sd"> corresponding right hand side label y.</span>
<span class="sd"> See also the documentation for the precise formulation.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> data : :py:class:`pyspark.RDD`</span>
<span class="sd"> The training data, an RDD of LabeledPoint.</span>
<span class="sd"> iterations : int, optional</span>
<span class="sd"> The number of iterations.</span>
<span class="sd"> (default: 100)</span>
<span class="sd"> step : float, optional</span>
<span class="sd"> The step parameter used in SGD.</span>
<span class="sd"> (default: 1.0)</span>
<span class="sd"> miniBatchFraction : float, optional</span>
<span class="sd"> Fraction of data to be used for each SGD iteration.</span>
<span class="sd"> (default: 1.0)</span>
<span class="sd"> initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional</span>
<span class="sd"> The initial weights.</span>
<span class="sd"> (default: None)</span>
<span class="sd"> regParam : float, optional</span>
<span class="sd"> The regularizer parameter.</span>
<span class="sd"> (default: 0.0)</span>
<span class="sd"> regType : str, optional</span>
<span class="sd"> The type of regularizer used for training our model.</span>
<span class="sd"> Supported values:</span>
<span class="sd"> - &quot;l1&quot; for using L1 regularization</span>
<span class="sd"> - &quot;l2&quot; for using L2 regularization</span>
<span class="sd"> - None for no regularization (default)</span>
<span class="sd"> intercept : bool, optional</span>
<span class="sd"> Boolean parameter which indicates the use or not of the</span>
<span class="sd"> augmented representation for training data (i.e., whether bias</span>
<span class="sd"> features are activated or not).</span>
<span class="sd"> (default: False)</span>
<span class="sd"> validateData : bool, optional</span>
<span class="sd"> Boolean parameter which indicates if the algorithm should</span>
<span class="sd"> validate data before training.</span>
<span class="sd"> (default: True)</span>
<span class="sd"> convergenceTol : float, optional</span>
<span class="sd"> A condition which decides iteration termination.</span>
<span class="sd"> (default: 0.001)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span><span class="s2">&quot;Deprecated in 2.0.0. Use ml.regression.LinearRegression.&quot;</span><span class="p">,</span> <span class="ne">FutureWarning</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span><span class="n">rdd</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">],</span> <span class="n">i</span><span class="p">:</span> <span class="n">Vector</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Any</span><span class="p">]:</span>
<span class="k">return</span> <span class="n">callMLlibFunc</span><span class="p">(</span>
<span class="s2">&quot;trainLinearRegressionModelWithSGD&quot;</span><span class="p">,</span>
<span class="n">rdd</span><span class="p">,</span>
<span class="nb">int</span><span class="p">(</span><span class="n">iterations</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">step</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">miniBatchFraction</span><span class="p">),</span>
<span class="n">i</span><span class="p">,</span>
<span class="nb">float</span><span class="p">(</span><span class="n">regParam</span><span class="p">),</span>
<span class="n">regType</span><span class="p">,</span>
<span class="nb">bool</span><span class="p">(</span><span class="n">intercept</span><span class="p">),</span>
<span class="nb">bool</span><span class="p">(</span><span class="n">validateData</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">convergenceTol</span><span class="p">),</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">_regression_train_wrapper</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">LinearRegressionModel</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">initialWeights</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="LassoModel"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.LassoModel.html#pyspark.mllib.regression.LassoModel">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">LassoModel</span><span class="p">(</span><span class="n">LinearRegressionModelBase</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;A linear regression model derived from a least-squares fit with</span>
<span class="sd"> an l_1 penalty term.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.mllib.linalg import SparseVector</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.mllib.regression import LabeledPoint</span>
<span class="sd"> &gt;&gt;&gt; data = [</span>
<span class="sd"> ... LabeledPoint(0.0, [0.0]),</span>
<span class="sd"> ... LabeledPoint(1.0, [1.0]),</span>
<span class="sd"> ... LabeledPoint(3.0, [2.0]),</span>
<span class="sd"> ... LabeledPoint(2.0, [3.0])</span>
<span class="sd"> ... ]</span>
<span class="sd"> &gt;&gt;&gt; lrm = LassoWithSGD.train(</span>
<span class="sd"> ... sc.parallelize(data), iterations=10, initialWeights=np.array([1.0]))</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(np.array([0.0])) - 0) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(np.array([1.0])) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; import os, tempfile</span>
<span class="sd"> &gt;&gt;&gt; path = tempfile.mkdtemp()</span>
<span class="sd"> &gt;&gt;&gt; lrm.save(sc, path)</span>
<span class="sd"> &gt;&gt;&gt; sameModel = LassoModel.load(sc, path)</span>
<span class="sd"> &gt;&gt;&gt; abs(sameModel.predict(np.array([0.0])) - 0) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(sameModel.predict(np.array([1.0])) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; from shutil import rmtree</span>
<span class="sd"> &gt;&gt;&gt; try:</span>
<span class="sd"> ... rmtree(path)</span>
<span class="sd"> ... except BaseException:</span>
<span class="sd"> ... pass</span>
<span class="sd"> &gt;&gt;&gt; data = [</span>
<span class="sd"> ... LabeledPoint(0.0, SparseVector(1, {0: 0.0})),</span>
<span class="sd"> ... LabeledPoint(1.0, SparseVector(1, {0: 1.0})),</span>
<span class="sd"> ... LabeledPoint(3.0, SparseVector(1, {0: 2.0})),</span>
<span class="sd"> ... LabeledPoint(2.0, SparseVector(1, {0: 3.0}))</span>
<span class="sd"> ... ]</span>
<span class="sd"> &gt;&gt;&gt; lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,</span>
<span class="sd"> ... initialWeights=np.array([1.0]))</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(np.array([0.0])) - 0) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; lrm = LassoWithSGD.train(sc.parallelize(data), iterations=10, step=1.0,</span>
<span class="sd"> ... regParam=0.01, miniBatchFraction=1.0, initialWeights=np.array([1.0]), intercept=True,</span>
<span class="sd"> ... validateData=True)</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(np.array([0.0])) - 0) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="LassoModel.save"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.LassoModel.html#pyspark.mllib.regression.LassoModel.save">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Save a LassoModel.&quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">regression</span><span class="o">.</span><span class="n">LassoModel</span><span class="p">(</span>
<span class="n">_py2java</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coeff</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">intercept</span>
<span class="p">)</span>
<span class="n">java_model</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">sc</span><span class="o">.</span><span class="n">_jsc</span><span class="o">.</span><span class="n">sc</span><span class="p">(),</span> <span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="LassoModel.load"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.LassoModel.html#pyspark.mllib.regression.LassoModel.load">[docs]</a> <span class="nd">@classmethod</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;LassoModel&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Load a LassoModel.&quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">regression</span><span class="o">.</span><span class="n">LassoModel</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">sc</span><span class="o">.</span><span class="n">_jsc</span><span class="o">.</span><span class="n">sc</span><span class="p">(),</span> <span class="n">path</span><span class="p">)</span>
<span class="n">weights</span> <span class="o">=</span> <span class="n">_java2py</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">java_model</span><span class="o">.</span><span class="n">weights</span><span class="p">())</span>
<span class="n">intercept</span> <span class="o">=</span> <span class="n">java_model</span><span class="o">.</span><span class="n">intercept</span><span class="p">()</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">LassoModel</span><span class="p">(</span><span class="n">weights</span><span class="p">,</span> <span class="n">intercept</span><span class="p">)</span>
<span class="k">return</span> <span class="n">model</span></div></div>
<div class="viewcode-block" id="LassoWithSGD"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.LassoWithSGD.html#pyspark.mllib.regression.LassoWithSGD">[docs]</a><span class="k">class</span> <span class="nc">LassoWithSGD</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train a regression model with L1-regularization using Stochastic Gradient Descent.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> .. deprecated:: 2.0.0</span>
<span class="sd"> Use :py:class:`pyspark.ml.regression.LinearRegression` with elasticNetParam = 1.0.</span>
<span class="sd"> Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="LassoWithSGD.train"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.LassoWithSGD.html#pyspark.mllib.regression.LassoWithSGD.train">[docs]</a> <span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span>
<span class="bp">cls</span><span class="p">,</span>
<span class="n">data</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">],</span>
<span class="n">iterations</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span>
<span class="n">step</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span>
<span class="n">regParam</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.01</span><span class="p">,</span>
<span class="n">miniBatchFraction</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span>
<span class="n">initialWeights</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">intercept</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">validateData</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">convergenceTol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.001</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">LassoModel</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train a regression model with L1-regularization using Stochastic</span>
<span class="sd"> Gradient Descent. This solves the l1-regularized least squares</span>
<span class="sd"> regression formulation</span>
<span class="sd"> f(weights) = 1/(2n) ||A weights - y||^2 + regParam ||weights||_1</span>
<span class="sd"> Here the data matrix has n rows, and the input RDD holds the set</span>
<span class="sd"> of rows of A, each with its corresponding right hand side label y.</span>
<span class="sd"> See also the documentation for the precise formulation.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> data : :py:class:`pyspark.RDD`</span>
<span class="sd"> The training data, an RDD of LabeledPoint.</span>
<span class="sd"> iterations : int, optional</span>
<span class="sd"> The number of iterations.</span>
<span class="sd"> (default: 100)</span>
<span class="sd"> step : float, optional</span>
<span class="sd"> The step parameter used in SGD.</span>
<span class="sd"> (default: 1.0)</span>
<span class="sd"> regParam : float, optional</span>
<span class="sd"> The regularizer parameter.</span>
<span class="sd"> (default: 0.01)</span>
<span class="sd"> miniBatchFraction : float, optional</span>
<span class="sd"> Fraction of data to be used for each SGD iteration.</span>
<span class="sd"> (default: 1.0)</span>
<span class="sd"> initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional</span>
<span class="sd"> The initial weights.</span>
<span class="sd"> (default: None)</span>
<span class="sd"> intercept : bool, optional</span>
<span class="sd"> Boolean parameter which indicates the use or not of the</span>
<span class="sd"> augmented representation for training data (i.e. whether bias</span>
<span class="sd"> features are activated or not).</span>
<span class="sd"> (default: False)</span>
<span class="sd"> validateData : bool, optional</span>
<span class="sd"> Boolean parameter which indicates if the algorithm should</span>
<span class="sd"> validate data before training.</span>
<span class="sd"> (default: True)</span>
<span class="sd"> convergenceTol : float, optional</span>
<span class="sd"> A condition which decides iteration termination.</span>
<span class="sd"> (default: 0.001)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
<span class="s2">&quot;Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0. &quot;</span>
<span class="s2">&quot;Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.&quot;</span><span class="p">,</span>
<span class="ne">FutureWarning</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span><span class="n">rdd</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">],</span> <span class="n">i</span><span class="p">:</span> <span class="n">Vector</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Any</span><span class="p">]:</span>
<span class="k">return</span> <span class="n">callMLlibFunc</span><span class="p">(</span>
<span class="s2">&quot;trainLassoModelWithSGD&quot;</span><span class="p">,</span>
<span class="n">rdd</span><span class="p">,</span>
<span class="nb">int</span><span class="p">(</span><span class="n">iterations</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">step</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">regParam</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">miniBatchFraction</span><span class="p">),</span>
<span class="n">i</span><span class="p">,</span>
<span class="nb">bool</span><span class="p">(</span><span class="n">intercept</span><span class="p">),</span>
<span class="nb">bool</span><span class="p">(</span><span class="n">validateData</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">convergenceTol</span><span class="p">),</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">_regression_train_wrapper</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">LassoModel</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">initialWeights</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="RidgeRegressionModel"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.RidgeRegressionModel.html#pyspark.mllib.regression.RidgeRegressionModel">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">RidgeRegressionModel</span><span class="p">(</span><span class="n">LinearRegressionModelBase</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;A linear regression model derived from a least-squares fit with</span>
<span class="sd"> an l_2 penalty term.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.mllib.linalg import SparseVector</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.mllib.regression import LabeledPoint</span>
<span class="sd"> &gt;&gt;&gt; data = [</span>
<span class="sd"> ... LabeledPoint(0.0, [0.0]),</span>
<span class="sd"> ... LabeledPoint(1.0, [1.0]),</span>
<span class="sd"> ... LabeledPoint(3.0, [2.0]),</span>
<span class="sd"> ... LabeledPoint(2.0, [3.0])</span>
<span class="sd"> ... ]</span>
<span class="sd"> &gt;&gt;&gt; lrm = RidgeRegressionWithSGD.train(sc.parallelize(data), iterations=10,</span>
<span class="sd"> ... initialWeights=np.array([1.0]))</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(np.array([0.0])) - 0) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(np.array([1.0])) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; import os, tempfile</span>
<span class="sd"> &gt;&gt;&gt; path = tempfile.mkdtemp()</span>
<span class="sd"> &gt;&gt;&gt; lrm.save(sc, path)</span>
<span class="sd"> &gt;&gt;&gt; sameModel = RidgeRegressionModel.load(sc, path)</span>
<span class="sd"> &gt;&gt;&gt; abs(sameModel.predict(np.array([0.0])) - 0) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(sameModel.predict(np.array([1.0])) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; from shutil import rmtree</span>
<span class="sd"> &gt;&gt;&gt; try:</span>
<span class="sd"> ... rmtree(path)</span>
<span class="sd"> ... except BaseException:</span>
<span class="sd"> ... pass</span>
<span class="sd"> &gt;&gt;&gt; data = [</span>
<span class="sd"> ... LabeledPoint(0.0, SparseVector(1, {0: 0.0})),</span>
<span class="sd"> ... LabeledPoint(1.0, SparseVector(1, {0: 1.0})),</span>
<span class="sd"> ... LabeledPoint(3.0, SparseVector(1, {0: 2.0})),</span>
<span class="sd"> ... LabeledPoint(2.0, SparseVector(1, {0: 3.0}))</span>
<span class="sd"> ... ]</span>
<span class="sd"> &gt;&gt;&gt; lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,</span>
<span class="sd"> ... initialWeights=np.array([1.0]))</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(np.array([0.0])) - 0) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; lrm = RidgeRegressionWithSGD.train(sc.parallelize(data), iterations=10, step=1.0,</span>
<span class="sd"> ... regParam=0.01, miniBatchFraction=1.0, initialWeights=np.array([1.0]), intercept=True,</span>
<span class="sd"> ... validateData=True)</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(np.array([0.0])) - 0) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) &lt; 0.5</span>
<span class="sd"> True</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="RidgeRegressionModel.save"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.RidgeRegressionModel.html#pyspark.mllib.regression.RidgeRegressionModel.save">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Save a RidgeRegressionMode.&quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">regression</span><span class="o">.</span><span class="n">RidgeRegressionModel</span><span class="p">(</span>
<span class="n">_py2java</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coeff</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">intercept</span>
<span class="p">)</span>
<span class="n">java_model</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">sc</span><span class="o">.</span><span class="n">_jsc</span><span class="o">.</span><span class="n">sc</span><span class="p">(),</span> <span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="RidgeRegressionModel.load"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.RidgeRegressionModel.html#pyspark.mllib.regression.RidgeRegressionModel.load">[docs]</a> <span class="nd">@classmethod</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;RidgeRegressionModel&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Load a RidgeRegressionMode.&quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">regression</span><span class="o">.</span><span class="n">RidgeRegressionModel</span><span class="o">.</span><span class="n">load</span><span class="p">(</span>
<span class="n">sc</span><span class="o">.</span><span class="n">_jsc</span><span class="o">.</span><span class="n">sc</span><span class="p">(),</span> <span class="n">path</span>
<span class="p">)</span>
<span class="n">weights</span> <span class="o">=</span> <span class="n">_java2py</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">java_model</span><span class="o">.</span><span class="n">weights</span><span class="p">())</span>
<span class="n">intercept</span> <span class="o">=</span> <span class="n">java_model</span><span class="o">.</span><span class="n">intercept</span><span class="p">()</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">RidgeRegressionModel</span><span class="p">(</span><span class="n">weights</span><span class="p">,</span> <span class="n">intercept</span><span class="p">)</span>
<span class="k">return</span> <span class="n">model</span></div></div>
<div class="viewcode-block" id="RidgeRegressionWithSGD"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.RidgeRegressionWithSGD.html#pyspark.mllib.regression.RidgeRegressionWithSGD">[docs]</a><span class="k">class</span> <span class="nc">RidgeRegressionWithSGD</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train a regression model with L2-regularization using Stochastic Gradient Descent.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> .. deprecated:: 2.0.0</span>
<span class="sd"> Use :py:class:`pyspark.ml.regression.LinearRegression` with elasticNetParam = 0.0.</span>
<span class="sd"> Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for</span>
<span class="sd"> LinearRegression.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="RidgeRegressionWithSGD.train"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.RidgeRegressionWithSGD.html#pyspark.mllib.regression.RidgeRegressionWithSGD.train">[docs]</a> <span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span>
<span class="bp">cls</span><span class="p">,</span>
<span class="n">data</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">],</span>
<span class="n">iterations</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span>
<span class="n">step</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span>
<span class="n">regParam</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.01</span><span class="p">,</span>
<span class="n">miniBatchFraction</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span>
<span class="n">initialWeights</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">intercept</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">validateData</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">convergenceTol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.001</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">RidgeRegressionModel</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train a regression model with L2-regularization using Stochastic</span>
<span class="sd"> Gradient Descent. This solves the l2-regularized least squares</span>
<span class="sd"> regression formulation</span>
<span class="sd"> f(weights) = 1/(2n) ||A weights - y||^2 + regParam/2 ||weights||^2</span>
<span class="sd"> Here the data matrix has n rows, and the input RDD holds the set</span>
<span class="sd"> of rows of A, each with its corresponding right hand side label y.</span>
<span class="sd"> See also the documentation for the precise formulation.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> data : :py:class:`pyspark.RDD`</span>
<span class="sd"> The training data, an RDD of LabeledPoint.</span>
<span class="sd"> iterations : int, optional</span>
<span class="sd"> The number of iterations.</span>
<span class="sd"> (default: 100)</span>
<span class="sd"> step : float, optional</span>
<span class="sd"> The step parameter used in SGD.</span>
<span class="sd"> (default: 1.0)</span>
<span class="sd"> regParam : float, optional</span>
<span class="sd"> The regularizer parameter.</span>
<span class="sd"> (default: 0.01)</span>
<span class="sd"> miniBatchFraction : float, optional</span>
<span class="sd"> Fraction of data to be used for each SGD iteration.</span>
<span class="sd"> (default: 1.0)</span>
<span class="sd"> initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional</span>
<span class="sd"> The initial weights.</span>
<span class="sd"> (default: None)</span>
<span class="sd"> intercept : bool, optional</span>
<span class="sd"> Boolean parameter which indicates the use or not of the</span>
<span class="sd"> augmented representation for training data (i.e. whether bias</span>
<span class="sd"> features are activated or not).</span>
<span class="sd"> (default: False)</span>
<span class="sd"> validateData : bool, optional</span>
<span class="sd"> Boolean parameter which indicates if the algorithm should</span>
<span class="sd"> validate data before training.</span>
<span class="sd"> (default: True)</span>
<span class="sd"> convergenceTol : float, optional</span>
<span class="sd"> A condition which decides iteration termination.</span>
<span class="sd"> (default: 0.001)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
<span class="s2">&quot;Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0. &quot;</span>
<span class="s2">&quot;Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for &quot;</span>
<span class="s2">&quot;LinearRegression.&quot;</span><span class="p">,</span>
<span class="ne">FutureWarning</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span><span class="n">rdd</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">],</span> <span class="n">i</span><span class="p">:</span> <span class="n">Vector</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Any</span><span class="p">]:</span>
<span class="k">return</span> <span class="n">callMLlibFunc</span><span class="p">(</span>
<span class="s2">&quot;trainRidgeModelWithSGD&quot;</span><span class="p">,</span>
<span class="n">rdd</span><span class="p">,</span>
<span class="nb">int</span><span class="p">(</span><span class="n">iterations</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">step</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">regParam</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">miniBatchFraction</span><span class="p">),</span>
<span class="n">i</span><span class="p">,</span>
<span class="nb">bool</span><span class="p">(</span><span class="n">intercept</span><span class="p">),</span>
<span class="nb">bool</span><span class="p">(</span><span class="n">validateData</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">convergenceTol</span><span class="p">),</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">_regression_train_wrapper</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">RidgeRegressionModel</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">initialWeights</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="IsotonicRegressionModel"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.IsotonicRegressionModel.html#pyspark.mllib.regression.IsotonicRegressionModel">[docs]</a><span class="k">class</span> <span class="nc">IsotonicRegressionModel</span><span class="p">(</span><span class="n">Saveable</span><span class="p">,</span> <span class="n">Loader</span><span class="p">[</span><span class="s2">&quot;IsotonicRegressionModel&quot;</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Regression model for isotonic regression.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> boundaries : ndarray</span>
<span class="sd"> Array of boundaries for which predictions are known. Boundaries</span>
<span class="sd"> must be sorted in increasing order.</span>
<span class="sd"> predictions : ndarray</span>
<span class="sd"> Array of predictions associated to the boundaries at the same</span>
<span class="sd"> index. Results of isotonic regression and therefore monotone.</span>
<span class="sd"> isotonic : true</span>
<span class="sd"> Indicates whether this is isotonic or antitonic.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; data = [(1, 0, 1), (2, 1, 1), (3, 2, 1), (1, 3, 1), (6, 4, 1), (17, 5, 1), (16, 6, 1)]</span>
<span class="sd"> &gt;&gt;&gt; irm = IsotonicRegression.train(sc.parallelize(data))</span>
<span class="sd"> &gt;&gt;&gt; irm.predict(3)</span>
<span class="sd"> 2.0</span>
<span class="sd"> &gt;&gt;&gt; irm.predict(5)</span>
<span class="sd"> 16.5</span>
<span class="sd"> &gt;&gt;&gt; irm.predict(sc.parallelize([3, 5])).collect()</span>
<span class="sd"> [2.0, 16.5]</span>
<span class="sd"> &gt;&gt;&gt; import os, tempfile</span>
<span class="sd"> &gt;&gt;&gt; path = tempfile.mkdtemp()</span>
<span class="sd"> &gt;&gt;&gt; irm.save(sc, path)</span>
<span class="sd"> &gt;&gt;&gt; sameModel = IsotonicRegressionModel.load(sc, path)</span>
<span class="sd"> &gt;&gt;&gt; sameModel.predict(3)</span>
<span class="sd"> 2.0</span>
<span class="sd"> &gt;&gt;&gt; sameModel.predict(5)</span>
<span class="sd"> 16.5</span>
<span class="sd"> &gt;&gt;&gt; from shutil import rmtree</span>
<span class="sd"> &gt;&gt;&gt; try:</span>
<span class="sd"> ... rmtree(path)</span>
<span class="sd"> ... except OSError:</span>
<span class="sd"> ... pass</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">boundaries</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="n">predictions</span><span class="p">:</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="n">isotonic</span><span class="p">:</span> <span class="nb">bool</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">boundaries</span> <span class="o">=</span> <span class="n">boundaries</span>
<span class="bp">self</span><span class="o">.</span><span class="n">predictions</span> <span class="o">=</span> <span class="n">predictions</span>
<span class="bp">self</span><span class="o">.</span><span class="n">isotonic</span> <span class="o">=</span> <span class="n">isotonic</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">np</span><span class="o">.</span><span class="n">float64</span><span class="p">:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="s2">&quot;VectorLike&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="nb">float</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">RDD</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">float64</span><span class="p">]:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">RDD</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">]:</span>
<span class="o">...</span>
<div class="viewcode-block" id="IsotonicRegressionModel.predict"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.IsotonicRegressionModel.html#pyspark.mllib.regression.IsotonicRegressionModel.predict">[docs]</a> <span class="k">def</span> <span class="nf">predict</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="s2">&quot;VectorLike&quot;</span><span class="p">,</span> <span class="n">RDD</span><span class="p">[</span><span class="nb">float</span><span class="p">],</span> <span class="n">RDD</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">]]</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">float64</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="n">RDD</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">float64</span><span class="p">],</span> <span class="n">RDD</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">ndarray</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Predict labels for provided features.</span>
<span class="sd"> Using a piecewise linear function.</span>
<span class="sd"> 1) If x exactly matches a boundary then associated prediction</span>
<span class="sd"> is returned. In case there are multiple predictions with the</span>
<span class="sd"> same boundary then one of them is returned. Which one is</span>
<span class="sd"> undefined (same as java.util.Arrays.binarySearch).</span>
<span class="sd"> 2) If x is lower or higher than all boundaries then first or</span>
<span class="sd"> last prediction is returned respectively. In case there are</span>
<span class="sd"> multiple predictions with the same boundary then the lowest</span>
<span class="sd"> or highest is returned respectively.</span>
<span class="sd"> 3) If x falls between two values in boundary array then</span>
<span class="sd"> prediction is treated as piecewise linear function and</span>
<span class="sd"> interpolated value is returned. In case there are multiple</span>
<span class="sd"> values with the same boundary then the same rules as in 2)</span>
<span class="sd"> are used.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> x : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`</span>
<span class="sd"> Feature or RDD of Features to be labeled.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span>
<span class="k">return</span> <span class="n">x</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">v</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">v</span><span class="p">))</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">interp</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">boundaries</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">predictions</span><span class="p">)</span> <span class="c1"># type: ignore[arg-type]</span></div>
<div class="viewcode-block" id="IsotonicRegressionModel.save"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.IsotonicRegressionModel.html#pyspark.mllib.regression.IsotonicRegressionModel.save">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Save an IsotonicRegressionModel.&quot;&quot;&quot;</span>
<span class="n">java_boundaries</span> <span class="o">=</span> <span class="n">_py2java</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">boundaries</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
<span class="n">java_predictions</span> <span class="o">=</span> <span class="n">_py2java</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">predictions</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">regression</span><span class="o">.</span><span class="n">IsotonicRegressionModel</span><span class="p">(</span>
<span class="n">java_boundaries</span><span class="p">,</span> <span class="n">java_predictions</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">isotonic</span>
<span class="p">)</span>
<span class="n">java_model</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">sc</span><span class="o">.</span><span class="n">_jsc</span><span class="o">.</span><span class="n">sc</span><span class="p">(),</span> <span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="IsotonicRegressionModel.load"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.IsotonicRegressionModel.html#pyspark.mllib.regression.IsotonicRegressionModel.load">[docs]</a> <span class="nd">@classmethod</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;IsotonicRegressionModel&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Load an IsotonicRegressionModel.&quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">regression</span><span class="o">.</span><span class="n">IsotonicRegressionModel</span><span class="o">.</span><span class="n">load</span><span class="p">(</span>
<span class="n">sc</span><span class="o">.</span><span class="n">_jsc</span><span class="o">.</span><span class="n">sc</span><span class="p">(),</span> <span class="n">path</span>
<span class="p">)</span>
<span class="n">py_boundaries</span> <span class="o">=</span> <span class="n">_java2py</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">java_model</span><span class="o">.</span><span class="n">boundaryVector</span><span class="p">())</span><span class="o">.</span><span class="n">toArray</span><span class="p">()</span>
<span class="n">py_predictions</span> <span class="o">=</span> <span class="n">_java2py</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">java_model</span><span class="o">.</span><span class="n">predictionVector</span><span class="p">())</span><span class="o">.</span><span class="n">toArray</span><span class="p">()</span>
<span class="k">return</span> <span class="n">IsotonicRegressionModel</span><span class="p">(</span><span class="n">py_boundaries</span><span class="p">,</span> <span class="n">py_predictions</span><span class="p">,</span> <span class="n">java_model</span><span class="o">.</span><span class="n">isotonic</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="IsotonicRegression"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.IsotonicRegression.html#pyspark.mllib.regression.IsotonicRegression">[docs]</a><span class="k">class</span> <span class="nc">IsotonicRegression</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Isotonic regression.</span>
<span class="sd"> Currently implemented using parallelized pool adjacent violators</span>
<span class="sd"> algorithm. Only univariate (single feature) algorithm supported.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Sequential PAV implementation based on</span>
<span class="sd"> Tibshirani, Ryan J., Holger Hoefling, and Robert Tibshirani (2011) [1]_</span>
<span class="sd"> Sequential PAV parallelization based on</span>
<span class="sd"> Kearsley, Anthony J., Richard A. Tapia, and Michael W. Trosset (1996) [2]_</span>
<span class="sd"> See also</span>
<span class="sd"> `Isotonic regression (Wikipedia) &lt;http://en.wikipedia.org/wiki/Isotonic_regression&gt;`_.</span>
<span class="sd"> .. [1] Tibshirani, Ryan J., Holger Hoefling, and Robert Tibshirani.</span>
<span class="sd"> &quot;Nearly-isotonic regression.&quot; Technometrics 53.1 (2011): 54-61.</span>
<span class="sd"> Available from http://www.stat.cmu.edu/~ryantibs/papers/neariso.pdf</span>
<span class="sd"> .. [2] Kearsley, Anthony J., Richard A. Tapia, and Michael W. Trosset</span>
<span class="sd"> &quot;An approach to parallelizing isotonic regression.&quot;</span>
<span class="sd"> Applied Mathematics and Parallel Computing. Physica-Verlag HD, 1996. 141-147.</span>
<span class="sd"> Available from http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="IsotonicRegression.train"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.IsotonicRegression.html#pyspark.mllib.regression.IsotonicRegression.train">[docs]</a> <span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">],</span> <span class="n">isotonic</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">IsotonicRegressionModel</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train an isotonic regression model on the given data.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> data : :py:class:`pyspark.RDD`</span>
<span class="sd"> RDD of (label, feature, weight) tuples.</span>
<span class="sd"> isotonic : bool, optional</span>
<span class="sd"> Whether this is isotonic (which is default) or antitonic.</span>
<span class="sd"> (default: True)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">boundaries</span><span class="p">,</span> <span class="n">predictions</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span>
<span class="s2">&quot;trainIsotonicRegressionModel&quot;</span><span class="p">,</span> <span class="n">data</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">_convert_to_vector</span><span class="p">),</span> <span class="nb">bool</span><span class="p">(</span><span class="n">isotonic</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">IsotonicRegressionModel</span><span class="p">(</span><span class="n">boundaries</span><span class="o">.</span><span class="n">toArray</span><span class="p">(),</span> <span class="n">predictions</span><span class="o">.</span><span class="n">toArray</span><span class="p">(),</span> <span class="n">isotonic</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="StreamingLinearAlgorithm"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.StreamingLinearAlgorithm.html#pyspark.mllib.regression.StreamingLinearAlgorithm">[docs]</a><span class="k">class</span> <span class="nc">StreamingLinearAlgorithm</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Base class that has to be inherited by any StreamingLinearAlgorithm.</span>
<span class="sd"> Prevents reimplementation of methods predictOn and predictOnValues.</span>
<span class="sd"> .. versionadded:: 1.5.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">model</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">LinearModel</span><span class="p">]):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_model</span> <span class="o">=</span> <span class="n">model</span>
<div class="viewcode-block" id="StreamingLinearAlgorithm.latestModel"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.StreamingLinearAlgorithm.html#pyspark.mllib.regression.StreamingLinearAlgorithm.latestModel">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.5.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">latestModel</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="n">LinearModel</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the latest model.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_model</span></div>
<span class="k">def</span> <span class="nf">_validate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dstream</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">dstream</span><span class="p">,</span> <span class="n">DStream</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;dstream should be a DStream object, got </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">dstream</span><span class="p">))</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">_model</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Model must be initialized using setInitialWeights&quot;</span><span class="p">)</span>
<div class="viewcode-block" id="StreamingLinearAlgorithm.predictOn"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.StreamingLinearAlgorithm.html#pyspark.mllib.regression.StreamingLinearAlgorithm.predictOn">[docs]</a> <span class="k">def</span> <span class="nf">predictOn</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dstream</span><span class="p">:</span> <span class="s2">&quot;DStream[VectorLike]&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DStream[float]&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Use the model to make predictions on batches of data from a</span>
<span class="sd"> DStream.</span>
<span class="sd"> .. versionadded:: 1.5.0</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> :py:class:`pyspark.streaming.DStream`</span>
<span class="sd"> DStream containing predictions.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_validate</span><span class="p">(</span><span class="n">dstream</span><span class="p">)</span>
<span class="k">return</span> <span class="n">dstream</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">_model</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">x</span><span class="p">))</span> <span class="c1"># type: ignore[union-attr]</span></div>
<div class="viewcode-block" id="StreamingLinearAlgorithm.predictOnValues"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.StreamingLinearAlgorithm.html#pyspark.mllib.regression.StreamingLinearAlgorithm.predictOnValues">[docs]</a> <span class="k">def</span> <span class="nf">predictOnValues</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">dstream</span><span class="p">:</span> <span class="s2">&quot;DStream[Tuple[K, VectorLike]]&quot;</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DStream[Tuple[K, float]]&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Use the model to make predictions on the values of a DStream and</span>
<span class="sd"> carry over its keys.</span>
<span class="sd"> .. versionadded:: 1.5.0</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> :py:class:`pyspark.streaming.DStream`</span>
<span class="sd"> DStream containing predictions.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_validate</span><span class="p">(</span><span class="n">dstream</span><span class="p">)</span>
<span class="k">return</span> <span class="n">dstream</span><span class="o">.</span><span class="n">mapValues</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">_model</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">x</span><span class="p">))</span> <span class="c1"># type: ignore[union-attr]</span></div></div>
<div class="viewcode-block" id="StreamingLinearRegressionWithSGD"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.StreamingLinearRegressionWithSGD.html#pyspark.mllib.regression.StreamingLinearRegressionWithSGD">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">StreamingLinearRegressionWithSGD</span><span class="p">(</span><span class="n">StreamingLinearAlgorithm</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train or predict a linear regression model on streaming data.</span>
<span class="sd"> Training uses Stochastic Gradient Descent to update the model</span>
<span class="sd"> based on each new batch of incoming data from a DStream</span>
<span class="sd"> (see `LinearRegressionWithSGD` for model equation).</span>
<span class="sd"> Each batch of data is assumed to be an RDD of LabeledPoints.</span>
<span class="sd"> The number of data points per batch can vary, but the number</span>
<span class="sd"> of features must be constant. An initial weight vector must</span>
<span class="sd"> be provided.</span>
<span class="sd"> .. versionadded:: 1.5.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> stepSize : float, optional</span>
<span class="sd"> Step size for each iteration of gradient descent.</span>
<span class="sd"> (default: 0.1)</span>
<span class="sd"> numIterations : int, optional</span>
<span class="sd"> Number of iterations run for each batch of data.</span>
<span class="sd"> (default: 50)</span>
<span class="sd"> miniBatchFraction : float, optional</span>
<span class="sd"> Fraction of each batch of data to use for updates.</span>
<span class="sd"> (default: 1.0)</span>
<span class="sd"> convergenceTol : float, optional</span>
<span class="sd"> Value used to determine when to terminate iterations.</span>
<span class="sd"> (default: 0.001)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">stepSize</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.1</span><span class="p">,</span>
<span class="n">numIterations</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">50</span><span class="p">,</span>
<span class="n">miniBatchFraction</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span>
<span class="n">convergenceTol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.001</span><span class="p">,</span>
<span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">stepSize</span> <span class="o">=</span> <span class="n">stepSize</span>
<span class="bp">self</span><span class="o">.</span><span class="n">numIterations</span> <span class="o">=</span> <span class="n">numIterations</span>
<span class="bp">self</span><span class="o">.</span><span class="n">miniBatchFraction</span> <span class="o">=</span> <span class="n">miniBatchFraction</span>
<span class="bp">self</span><span class="o">.</span><span class="n">convergenceTol</span> <span class="o">=</span> <span class="n">convergenceTol</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_model</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">LinearModel</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="nb">super</span><span class="p">(</span><span class="n">StreamingLinearRegressionWithSGD</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_model</span><span class="p">)</span>
<div class="viewcode-block" id="StreamingLinearRegressionWithSGD.setInitialWeights"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.StreamingLinearRegressionWithSGD.html#pyspark.mllib.regression.StreamingLinearRegressionWithSGD.setInitialWeights">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.5.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">setInitialWeights</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">initialWeights</span><span class="p">:</span> <span class="s2">&quot;VectorLike&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;StreamingLinearRegressionWithSGD&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Set the initial value of weights.</span>
<span class="sd"> This must be set before running trainOn and predictOn</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">initialWeights</span> <span class="o">=</span> <span class="n">_convert_to_vector</span><span class="p">(</span><span class="n">initialWeights</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_model</span> <span class="o">=</span> <span class="n">LinearRegressionModel</span><span class="p">(</span><span class="n">initialWeights</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="StreamingLinearRegressionWithSGD.trainOn"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.regression.StreamingLinearRegressionWithSGD.html#pyspark.mllib.regression.StreamingLinearRegressionWithSGD.trainOn">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.5.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">trainOn</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dstream</span><span class="p">:</span> <span class="s2">&quot;DStream[LabeledPoint]&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Train the model on the incoming dstream.&quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_validate</span><span class="p">(</span><span class="n">dstream</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">update</span><span class="p">(</span><span class="n">rdd</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># LinearRegressionWithSGD.train raises an error for an empty RDD.</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">rdd</span><span class="o">.</span><span class="n">isEmpty</span><span class="p">():</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_model</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_model</span> <span class="o">=</span> <span class="n">LinearRegressionWithSGD</span><span class="o">.</span><span class="n">train</span><span class="p">(</span>
<span class="n">rdd</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">numIterations</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">stepSize</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">miniBatchFraction</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_model</span><span class="o">.</span><span class="n">weights</span><span class="p">,</span>
<span class="n">intercept</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_model</span><span class="o">.</span><span class="n">intercept</span><span class="p">,</span> <span class="c1"># type: ignore[arg-type]</span>
<span class="n">convergenceTol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">convergenceTol</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">dstream</span><span class="o">.</span><span class="n">foreachRDD</span><span class="p">(</span><span class="n">update</span><span class="p">)</span></div></div>
<span class="k">def</span> <span class="nf">_test</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="kn">import</span> <span class="nn">doctest</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span>
<span class="kn">import</span> <span class="nn">pyspark.mllib.regression</span>
<span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">regression</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">master</span><span class="p">(</span><span class="s2">&quot;local[2]&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">&quot;mllib.regression tests&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span>
<span class="n">globs</span><span class="p">[</span><span class="s2">&quot;sc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">sparkContext</span>
<span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span><span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span> <span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span><span class="p">)</span>
<span class="n">spark</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span>
<span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span>
<span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="n">_test</span><span class="p">()</span>
</pre></div>
</article>
<footer class="bd-footer-article">
<div class="footer-article-items footer-article__inner">
<div class="footer-article-item"><!-- Previous / next buttons -->
<div class="prev-next-area">
</div></div>
</div>
</footer>
</div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52"></script>
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item"><p class="copyright">
Copyright @ 2024 The Apache Software Foundation, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
</p></div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 4.5.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item"><p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.13.3.
</p></div>
</div>
</div>
</footer>
</body>
</html>