blob: d09deaaffa01432b0962342272cf12adac020f0e [file] [log] [blame]
<!DOCTYPE html>
<html >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>pyspark.mllib.classification &#8212; PySpark 4.0.0-preview1 documentation</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../../../_static/styles/theme.css?digest=e353d410970836974a52" rel="stylesheet" />
<link href="../../../_static/styles/bootstrap.css?digest=e353d410970836974a52" rel="stylesheet" />
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=e353d410970836974a52" rel="stylesheet" />
<link href="../../../_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=e353d410970836974a52" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52" />
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/clipboard.min.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = '_modules/pyspark/mllib/classification';</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/mllib/classification.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="None">
<!-- Matomo -->
<script type="text/javascript">
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a class="skip-link" href="#main-content">Skip to main content</a>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../../../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<nav class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../../../index.html">
<img src="../../../_static/spark-logo-light.png" class="logo__image only-light" alt="Logo image"/>
<script>document.write(`<img src="../../../_static/spark-logo-dark.png" class="logo__image only-dark" alt="Logo image"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item"><nav class="navbar-nav">
<p class="sidebar-header-items__title"
role="heading"
aria-level="1"
aria-label="Site Navigation">
Site Navigation
</p>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../index.html">
Overview
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../getting_started/index.html">
Getting Started
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../user_guide/index.html">
User Guides
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../reference/index.html">
API Reference
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../development/index.html">
Development
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
</button>
`);
</script>
</div>
<div class="navbar-item"><!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
4.0.0-preview1
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "_modules/pyspark/mllib/classification.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span>
<span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span>
<span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span>
<label class="sr-only">GitHub</label></a>
</li>
<li class="nav-item">
<a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span>
<label class="sr-only">PyPI</label></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
</button>
`);
</script>
</div>
</div>
</nav>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar hide-on-wide">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item"><nav class="navbar-nav">
<p class="sidebar-header-items__title"
role="heading"
aria-level="1"
aria-label="Site Navigation">
Site Navigation
</p>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../index.html">
Overview
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../getting_started/index.html">
Getting Started
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../user_guide/index.html">
User Guides
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../reference/index.html">
API Reference
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../development/index.html">
Development
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item"><!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
4.0.0-preview1
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "_modules/pyspark/mllib/classification.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span>
<span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span>
<span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span>
<label class="sr-only">GitHub</label></a>
</li>
<li class="nav-item">
<a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span>
<label class="sr-only">PyPI</label></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumbs">
<ul class="bd-breadcrumbs" role="navigation" aria-label="Breadcrumb">
<li class="breadcrumb-item breadcrumb-home">
<a href="../../../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="../../index.html" class="nav-link">Module code</a></li>
<li class="breadcrumb-item active" aria-current="page">pyspark.mllib.classification</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article" role="main">
<h1>Source code for pyspark.mllib.classification</h1><div class="highlight"><pre>
<span></span><span class="c1">#</span>
<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
<span class="c1"># this work for additional information regarding copyright ownership.</span>
<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
<span class="c1"># (the &quot;License&quot;); you may not use this file except in compliance with</span>
<span class="c1"># the License. You may obtain a copy of the License at</span>
<span class="c1">#</span>
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="c1">#</span>
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
<span class="c1"># See the License for the specific language governing permissions and</span>
<span class="c1"># limitations under the License.</span>
<span class="c1">#</span>
<span class="kn">from</span> <span class="nn">math</span> <span class="kn">import</span> <span class="n">exp</span>
<span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">import</span> <span class="nn">warnings</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Iterable</span><span class="p">,</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">overload</span><span class="p">,</span> <span class="n">TYPE_CHECKING</span>
<span class="kn">import</span> <span class="nn">numpy</span>
<span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">RDD</span><span class="p">,</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">since</span>
<span class="kn">from</span> <span class="nn">pyspark.streaming.dstream</span> <span class="kn">import</span> <span class="n">DStream</span>
<span class="kn">from</span> <span class="nn">pyspark.mllib.common</span> <span class="kn">import</span> <span class="n">callMLlibFunc</span><span class="p">,</span> <span class="n">_py2java</span><span class="p">,</span> <span class="n">_java2py</span>
<span class="kn">from</span> <span class="nn">pyspark.mllib.linalg</span> <span class="kn">import</span> <span class="n">_convert_to_vector</span>
<span class="kn">from</span> <span class="nn">pyspark.mllib.regression</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">LabeledPoint</span><span class="p">,</span>
<span class="n">LinearModel</span><span class="p">,</span>
<span class="n">_regression_train_wrapper</span><span class="p">,</span>
<span class="n">StreamingLinearAlgorithm</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pyspark.mllib.util</span> <span class="kn">import</span> <span class="n">Saveable</span><span class="p">,</span> <span class="n">Loader</span><span class="p">,</span> <span class="n">inherit_doc</span>
<span class="kn">from</span> <span class="nn">pyspark.mllib.linalg</span> <span class="kn">import</span> <span class="n">Vector</span>
<span class="kn">from</span> <span class="nn">pyspark.mllib.regression</span> <span class="kn">import</span> <span class="n">LabeledPoint</span>
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">pyspark.mllib._typing</span> <span class="kn">import</span> <span class="n">VectorLike</span>
<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span>
<span class="s2">&quot;LogisticRegressionModel&quot;</span><span class="p">,</span>
<span class="s2">&quot;LogisticRegressionWithSGD&quot;</span><span class="p">,</span>
<span class="s2">&quot;LogisticRegressionWithLBFGS&quot;</span><span class="p">,</span>
<span class="s2">&quot;SVMModel&quot;</span><span class="p">,</span>
<span class="s2">&quot;SVMWithSGD&quot;</span><span class="p">,</span>
<span class="s2">&quot;NaiveBayesModel&quot;</span><span class="p">,</span>
<span class="s2">&quot;NaiveBayes&quot;</span><span class="p">,</span>
<span class="s2">&quot;StreamingLogisticRegressionWithSGD&quot;</span><span class="p">,</span>
<span class="p">]</span>
<span class="k">class</span> <span class="nc">LinearClassificationModel</span><span class="p">(</span><span class="n">LinearModel</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> A private abstract class representing a multiclass classification</span>
<span class="sd"> model. The categories are represented by int values: 0, 1, 2, etc.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">weights</span><span class="p">:</span> <span class="n">Vector</span><span class="p">,</span> <span class="n">intercept</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="nb">super</span><span class="p">(</span><span class="n">LinearClassificationModel</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">weights</span><span class="p">,</span> <span class="n">intercept</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_threshold</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">setThreshold</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Sets the threshold that separates positive predictions from</span>
<span class="sd"> negative predictions. An example with prediction score greater</span>
<span class="sd"> than or equal to this threshold is identified as a positive,</span>
<span class="sd"> and negative otherwise. It is used for binary classification</span>
<span class="sd"> only.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_threshold</span> <span class="o">=</span> <span class="n">value</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">threshold</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the threshold (if any) used for converting raw</span>
<span class="sd"> prediction scores into 0/1 predictions. It is used for</span>
<span class="sd"> binary classification only.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_threshold</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">clearThreshold</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Clears the threshold so that `predict` will output raw</span>
<span class="sd"> prediction scores. It is used for binary classification only.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_threshold</span> <span class="o">=</span> <span class="kc">None</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">test</span><span class="p">:</span> <span class="s2">&quot;VectorLike&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">]:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">test</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">RDD</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]:</span>
<span class="o">...</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">test</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">,</span> <span class="n">RDD</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">]]</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">RDD</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">]],</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Predict values for a single data point or an RDD of points</span>
<span class="sd"> using the model trained.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span>
<div class="viewcode-block" id="LogisticRegressionModel"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.LogisticRegressionModel.html#pyspark.mllib.classification.LogisticRegressionModel">[docs]</a><span class="k">class</span> <span class="nc">LogisticRegressionModel</span><span class="p">(</span><span class="n">LinearClassificationModel</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Classification model trained using Multinomial/Binary Logistic</span>
<span class="sd"> Regression.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> weights : :py:class:`pyspark.mllib.linalg.Vector`</span>
<span class="sd"> Weights computed for every feature.</span>
<span class="sd"> intercept : float</span>
<span class="sd"> Intercept computed for this model. (Only used in Binary Logistic</span>
<span class="sd"> Regression. In Multinomial Logistic Regression, the intercepts will</span>
<span class="sd"> not be a single value, so the intercepts will be part of the</span>
<span class="sd"> weights.)</span>
<span class="sd"> numFeatures : int</span>
<span class="sd"> The dimension of the features.</span>
<span class="sd"> numClasses : int</span>
<span class="sd"> The number of possible outcomes for k classes classification problem</span>
<span class="sd"> in Multinomial Logistic Regression. By default, it is binary</span>
<span class="sd"> logistic regression so numClasses will be set to 2.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.mllib.linalg import SparseVector</span>
<span class="sd"> &gt;&gt;&gt; data = [</span>
<span class="sd"> ... LabeledPoint(0.0, [0.0, 1.0]),</span>
<span class="sd"> ... LabeledPoint(1.0, [1.0, 0.0]),</span>
<span class="sd"> ... ]</span>
<span class="sd"> &gt;&gt;&gt; lrm = LogisticRegressionWithSGD.train(sc.parallelize(data), iterations=10)</span>
<span class="sd"> &gt;&gt;&gt; lrm.predict([1.0, 0.0])</span>
<span class="sd"> 1</span>
<span class="sd"> &gt;&gt;&gt; lrm.predict([0.0, 1.0])</span>
<span class="sd"> 0</span>
<span class="sd"> &gt;&gt;&gt; lrm.predict(sc.parallelize([[1.0, 0.0], [0.0, 1.0]])).collect()</span>
<span class="sd"> [1, 0]</span>
<span class="sd"> &gt;&gt;&gt; lrm.clearThreshold()</span>
<span class="sd"> &gt;&gt;&gt; lrm.predict([0.0, 1.0])</span>
<span class="sd"> 0.279...</span>
<span class="sd"> &gt;&gt;&gt; sparse_data = [</span>
<span class="sd"> ... LabeledPoint(0.0, SparseVector(2, {0: 0.0})),</span>
<span class="sd"> ... LabeledPoint(1.0, SparseVector(2, {1: 1.0})),</span>
<span class="sd"> ... LabeledPoint(0.0, SparseVector(2, {0: 1.0})),</span>
<span class="sd"> ... LabeledPoint(1.0, SparseVector(2, {1: 2.0}))</span>
<span class="sd"> ... ]</span>
<span class="sd"> &gt;&gt;&gt; lrm = LogisticRegressionWithSGD.train(sc.parallelize(sparse_data), iterations=10)</span>
<span class="sd"> &gt;&gt;&gt; lrm.predict(numpy.array([0.0, 1.0]))</span>
<span class="sd"> 1</span>
<span class="sd"> &gt;&gt;&gt; lrm.predict(numpy.array([1.0, 0.0]))</span>
<span class="sd"> 0</span>
<span class="sd"> &gt;&gt;&gt; lrm.predict(SparseVector(2, {1: 1.0}))</span>
<span class="sd"> 1</span>
<span class="sd"> &gt;&gt;&gt; lrm.predict(SparseVector(2, {0: 1.0}))</span>
<span class="sd"> 0</span>
<span class="sd"> &gt;&gt;&gt; import os, tempfile</span>
<span class="sd"> &gt;&gt;&gt; path = tempfile.mkdtemp()</span>
<span class="sd"> &gt;&gt;&gt; lrm.save(sc, path)</span>
<span class="sd"> &gt;&gt;&gt; sameModel = LogisticRegressionModel.load(sc, path)</span>
<span class="sd"> &gt;&gt;&gt; sameModel.predict(numpy.array([0.0, 1.0]))</span>
<span class="sd"> 1</span>
<span class="sd"> &gt;&gt;&gt; sameModel.predict(SparseVector(2, {0: 1.0}))</span>
<span class="sd"> 0</span>
<span class="sd"> &gt;&gt;&gt; from shutil import rmtree</span>
<span class="sd"> &gt;&gt;&gt; try:</span>
<span class="sd"> ... rmtree(path)</span>
<span class="sd"> ... except BaseException:</span>
<span class="sd"> ... pass</span>
<span class="sd"> &gt;&gt;&gt; multi_class_data = [</span>
<span class="sd"> ... LabeledPoint(0.0, [0.0, 1.0, 0.0]),</span>
<span class="sd"> ... LabeledPoint(1.0, [1.0, 0.0, 0.0]),</span>
<span class="sd"> ... LabeledPoint(2.0, [0.0, 0.0, 1.0])</span>
<span class="sd"> ... ]</span>
<span class="sd"> &gt;&gt;&gt; data = sc.parallelize(multi_class_data)</span>
<span class="sd"> &gt;&gt;&gt; mcm = LogisticRegressionWithLBFGS.train(data, iterations=10, numClasses=3)</span>
<span class="sd"> &gt;&gt;&gt; mcm.predict([0.0, 0.5, 0.0])</span>
<span class="sd"> 0</span>
<span class="sd"> &gt;&gt;&gt; mcm.predict([0.8, 0.0, 0.0])</span>
<span class="sd"> 1</span>
<span class="sd"> &gt;&gt;&gt; mcm.predict([0.0, 0.0, 0.3])</span>
<span class="sd"> 2</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">weights</span><span class="p">:</span> <span class="n">Vector</span><span class="p">,</span> <span class="n">intercept</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">numFeatures</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">numClasses</span><span class="p">:</span> <span class="nb">int</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="nb">super</span><span class="p">(</span><span class="n">LogisticRegressionModel</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">weights</span><span class="p">,</span> <span class="n">intercept</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_numFeatures</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">numFeatures</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_numClasses</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">numClasses</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_threshold</span> <span class="o">=</span> <span class="mf">0.5</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_numClasses</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_dataWithBiasSize</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_weightsMatrix</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_dataWithBiasSize</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coeff</span><span class="o">.</span><span class="n">size</span> <span class="o">//</span> <span class="p">(</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_numClasses</span> <span class="o">-</span> <span class="mi">1</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_weightsMatrix</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coeff</span><span class="o">.</span><span class="n">toArray</span><span class="p">()</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_numClasses</span> <span class="o">-</span> <span class="mi">1</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dataWithBiasSize</span>
<span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">numFeatures</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Dimension of the features.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_numFeatures</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">numClasses</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Number of possible outcomes for k classes classification problem</span>
<span class="sd"> in Multinomial Logistic Regression.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_numClasses</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="s2">&quot;VectorLike&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">]:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">RDD</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]:</span>
<span class="o">...</span>
<div class="viewcode-block" id="LogisticRegressionModel.predict"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.LogisticRegressionModel.html#pyspark.mllib.classification.LogisticRegressionModel.predict">[docs]</a> <span class="k">def</span> <span class="nf">predict</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">,</span> <span class="n">RDD</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">]]</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">RDD</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">]],</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Predict values for a single data point or an RDD of points</span>
<span class="sd"> using the model trained.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span>
<span class="k">return</span> <span class="n">x</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">v</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">v</span><span class="p">))</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">_convert_to_vector</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">numClasses</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
<span class="n">margin</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">weights</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">_intercept</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="k">if</span> <span class="n">margin</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">prob</span> <span class="o">=</span> <span class="mi">1</span> <span class="o">/</span> <span class="p">(</span><span class="mi">1</span> <span class="o">+</span> <span class="n">exp</span><span class="p">(</span><span class="o">-</span><span class="n">margin</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">exp_margin</span> <span class="o">=</span> <span class="n">exp</span><span class="p">(</span><span class="n">margin</span><span class="p">)</span>
<span class="n">prob</span> <span class="o">=</span> <span class="n">exp_margin</span> <span class="o">/</span> <span class="p">(</span><span class="mi">1</span> <span class="o">+</span> <span class="n">exp_margin</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_threshold</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="n">prob</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">prob</span> <span class="o">&gt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">_threshold</span> <span class="k">else</span> <span class="mi">0</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_weightsMatrix</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">best_class</span> <span class="o">=</span> <span class="mi">0</span>
<span class="n">max_margin</span> <span class="o">=</span> <span class="mf">0.0</span>
<span class="k">if</span> <span class="n">x</span><span class="o">.</span><span class="n">size</span> <span class="o">+</span> <span class="mi">1</span> <span class="o">==</span> <span class="bp">self</span><span class="o">.</span><span class="n">_dataWithBiasSize</span><span class="p">:</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_numClasses</span> <span class="o">-</span> <span class="mi">1</span><span class="p">):</span>
<span class="n">margin</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">x</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_weightsMatrix</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="mi">0</span> <span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">size</span><span class="p">])</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">_weightsMatrix</span><span class="p">[</span><span class="n">i</span><span class="p">][</span><span class="n">x</span><span class="o">.</span><span class="n">size</span><span class="p">]</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">margin</span> <span class="o">&gt;</span> <span class="n">max_margin</span><span class="p">:</span>
<span class="n">max_margin</span> <span class="o">=</span> <span class="n">margin</span>
<span class="n">best_class</span> <span class="o">=</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">1</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_numClasses</span> <span class="o">-</span> <span class="mi">1</span><span class="p">):</span>
<span class="n">margin</span> <span class="o">=</span> <span class="n">x</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_weightsMatrix</span><span class="p">[</span><span class="n">i</span><span class="p">])</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="k">if</span> <span class="n">margin</span> <span class="o">&gt;</span> <span class="n">max_margin</span><span class="p">:</span>
<span class="n">max_margin</span> <span class="o">=</span> <span class="n">margin</span>
<span class="n">best_class</span> <span class="o">=</span> <span class="n">i</span> <span class="o">+</span> <span class="mi">1</span>
<span class="k">return</span> <span class="n">best_class</span></div>
<div class="viewcode-block" id="LogisticRegressionModel.save"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.LogisticRegressionModel.html#pyspark.mllib.classification.LogisticRegressionModel.save">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Save this model to the given path.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">classification</span><span class="o">.</span><span class="n">LogisticRegressionModel</span><span class="p">(</span>
<span class="n">_py2java</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coeff</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">intercept</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">numFeatures</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">numClasses</span>
<span class="p">)</span>
<span class="n">java_model</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">sc</span><span class="o">.</span><span class="n">_jsc</span><span class="o">.</span><span class="n">sc</span><span class="p">(),</span> <span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="LogisticRegressionModel.load"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.LogisticRegressionModel.html#pyspark.mllib.classification.LogisticRegressionModel.load">[docs]</a> <span class="nd">@classmethod</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;LogisticRegressionModel&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Load a model from the given path.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">classification</span><span class="o">.</span><span class="n">LogisticRegressionModel</span><span class="o">.</span><span class="n">load</span><span class="p">(</span>
<span class="n">sc</span><span class="o">.</span><span class="n">_jsc</span><span class="o">.</span><span class="n">sc</span><span class="p">(),</span> <span class="n">path</span>
<span class="p">)</span>
<span class="n">weights</span> <span class="o">=</span> <span class="n">_java2py</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">java_model</span><span class="o">.</span><span class="n">weights</span><span class="p">())</span>
<span class="n">intercept</span> <span class="o">=</span> <span class="n">java_model</span><span class="o">.</span><span class="n">intercept</span><span class="p">()</span>
<span class="n">numFeatures</span> <span class="o">=</span> <span class="n">java_model</span><span class="o">.</span><span class="n">numFeatures</span><span class="p">()</span>
<span class="n">numClasses</span> <span class="o">=</span> <span class="n">java_model</span><span class="o">.</span><span class="n">numClasses</span><span class="p">()</span>
<span class="n">threshold</span> <span class="o">=</span> <span class="n">java_model</span><span class="o">.</span><span class="n">getThreshold</span><span class="p">()</span><span class="o">.</span><span class="n">get</span><span class="p">()</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">LogisticRegressionModel</span><span class="p">(</span><span class="n">weights</span><span class="p">,</span> <span class="n">intercept</span><span class="p">,</span> <span class="n">numFeatures</span><span class="p">,</span> <span class="n">numClasses</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">setThreshold</span><span class="p">(</span><span class="n">threshold</span><span class="p">)</span>
<span class="k">return</span> <span class="n">model</span></div>
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="k">return</span> <span class="p">(</span>
<span class="s2">&quot;pyspark.mllib.LogisticRegressionModel: intercept = </span><span class="si">{}</span><span class="s2">, &quot;</span>
<span class="s2">&quot;numFeatures = </span><span class="si">{}</span><span class="s2">, numClasses = </span><span class="si">{}</span><span class="s2">, threshold = </span><span class="si">{}</span><span class="s2">&quot;</span>
<span class="p">)</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_intercept</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_numFeatures</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_numClasses</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_threshold</span><span class="p">)</span></div>
<div class="viewcode-block" id="LogisticRegressionWithSGD"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.LogisticRegressionWithSGD.html#pyspark.mllib.classification.LogisticRegressionWithSGD">[docs]</a><span class="k">class</span> <span class="nc">LogisticRegressionWithSGD</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train a classification model for Binary Logistic Regression using Stochastic Gradient Descent.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> .. deprecated:: 2.0.0</span>
<span class="sd"> Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="LogisticRegressionWithSGD.train"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.LogisticRegressionWithSGD.html#pyspark.mllib.classification.LogisticRegressionWithSGD.train">[docs]</a> <span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span>
<span class="bp">cls</span><span class="p">,</span>
<span class="n">data</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">],</span>
<span class="n">iterations</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span>
<span class="n">step</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span>
<span class="n">miniBatchFraction</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span>
<span class="n">initialWeights</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">regParam</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.01</span><span class="p">,</span>
<span class="n">regType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;l2&quot;</span><span class="p">,</span>
<span class="n">intercept</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">validateData</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">convergenceTol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.001</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">LogisticRegressionModel</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train a logistic regression model on the given data.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> data : :py:class:`pyspark.RDD`</span>
<span class="sd"> The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`.</span>
<span class="sd"> iterations : int, optional</span>
<span class="sd"> The number of iterations.</span>
<span class="sd"> (default: 100)</span>
<span class="sd"> step : float, optional</span>
<span class="sd"> The step parameter used in SGD.</span>
<span class="sd"> (default: 1.0)</span>
<span class="sd"> miniBatchFraction : float, optional</span>
<span class="sd"> Fraction of data to be used for each SGD iteration.</span>
<span class="sd"> (default: 1.0)</span>
<span class="sd"> initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional</span>
<span class="sd"> The initial weights.</span>
<span class="sd"> (default: None)</span>
<span class="sd"> regParam : float, optional</span>
<span class="sd"> The regularizer parameter.</span>
<span class="sd"> (default: 0.01)</span>
<span class="sd"> regType : str, optional</span>
<span class="sd"> The type of regularizer used for training our model.</span>
<span class="sd"> Supported values:</span>
<span class="sd"> - &quot;l1&quot; for using L1 regularization</span>
<span class="sd"> - &quot;l2&quot; for using L2 regularization (default)</span>
<span class="sd"> - None for no regularization</span>
<span class="sd"> intercept : bool, optional</span>
<span class="sd"> Boolean parameter which indicates the use or not of the</span>
<span class="sd"> augmented representation for training data (i.e., whether bias</span>
<span class="sd"> features are activated or not).</span>
<span class="sd"> (default: False)</span>
<span class="sd"> validateData : bool, optional</span>
<span class="sd"> Boolean parameter which indicates if the algorithm should</span>
<span class="sd"> validate data before training.</span>
<span class="sd"> (default: True)</span>
<span class="sd"> convergenceTol : float, optional</span>
<span class="sd"> A condition which decides iteration termination.</span>
<span class="sd"> (default: 0.001)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
<span class="s2">&quot;Deprecated in 2.0.0. Use ml.classification.LogisticRegression or &quot;</span>
<span class="s2">&quot;LogisticRegressionWithLBFGS.&quot;</span><span class="p">,</span>
<span class="ne">FutureWarning</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span><span class="n">rdd</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">],</span> <span class="n">i</span><span class="p">:</span> <span class="n">Vector</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Any</span><span class="p">]:</span>
<span class="k">return</span> <span class="n">callMLlibFunc</span><span class="p">(</span>
<span class="s2">&quot;trainLogisticRegressionModelWithSGD&quot;</span><span class="p">,</span>
<span class="n">rdd</span><span class="p">,</span>
<span class="nb">int</span><span class="p">(</span><span class="n">iterations</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">step</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">miniBatchFraction</span><span class="p">),</span>
<span class="n">i</span><span class="p">,</span>
<span class="nb">float</span><span class="p">(</span><span class="n">regParam</span><span class="p">),</span>
<span class="n">regType</span><span class="p">,</span>
<span class="nb">bool</span><span class="p">(</span><span class="n">intercept</span><span class="p">),</span>
<span class="nb">bool</span><span class="p">(</span><span class="n">validateData</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">convergenceTol</span><span class="p">),</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">_regression_train_wrapper</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">LogisticRegressionModel</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">initialWeights</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="LogisticRegressionWithLBFGS"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.LogisticRegressionWithLBFGS.html#pyspark.mllib.classification.LogisticRegressionWithLBFGS">[docs]</a><span class="k">class</span> <span class="nc">LogisticRegressionWithLBFGS</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train a classification model for Multinomial/Binary Logistic Regression</span>
<span class="sd"> using Limited-memory BFGS.</span>
<span class="sd"> Standard feature scaling and L2 regularization are used by default.</span>
<span class="sd"> .. versionadded:: 1.2.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="LogisticRegressionWithLBFGS.train"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.LogisticRegressionWithLBFGS.html#pyspark.mllib.classification.LogisticRegressionWithLBFGS.train">[docs]</a> <span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span>
<span class="bp">cls</span><span class="p">,</span>
<span class="n">data</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">],</span>
<span class="n">iterations</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span>
<span class="n">initialWeights</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">regParam</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span>
<span class="n">regType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;l2&quot;</span><span class="p">,</span>
<span class="n">intercept</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">corrections</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10</span><span class="p">,</span>
<span class="n">tolerance</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1e-6</span><span class="p">,</span>
<span class="n">validateData</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">numClasses</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">2</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">LogisticRegressionModel</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train a logistic regression model on the given data.</span>
<span class="sd"> .. versionadded:: 1.2.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> data : :py:class:`pyspark.RDD`</span>
<span class="sd"> The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`.</span>
<span class="sd"> iterations : int, optional</span>
<span class="sd"> The number of iterations.</span>
<span class="sd"> (default: 100)</span>
<span class="sd"> initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional</span>
<span class="sd"> The initial weights.</span>
<span class="sd"> (default: None)</span>
<span class="sd"> regParam : float, optional</span>
<span class="sd"> The regularizer parameter.</span>
<span class="sd"> (default: 0.01)</span>
<span class="sd"> regType : str, optional</span>
<span class="sd"> The type of regularizer used for training our model.</span>
<span class="sd"> Supported values:</span>
<span class="sd"> - &quot;l1&quot; for using L1 regularization</span>
<span class="sd"> - &quot;l2&quot; for using L2 regularization (default)</span>
<span class="sd"> - None for no regularization</span>
<span class="sd"> intercept : bool, optional</span>
<span class="sd"> Boolean parameter which indicates the use or not of the</span>
<span class="sd"> augmented representation for training data (i.e., whether bias</span>
<span class="sd"> features are activated or not).</span>
<span class="sd"> (default: False)</span>
<span class="sd"> corrections : int, optional</span>
<span class="sd"> The number of corrections used in the LBFGS update.</span>
<span class="sd"> If a known updater is used for binary classification,</span>
<span class="sd"> it calls the ml implementation and this parameter will</span>
<span class="sd"> have no effect. (default: 10)</span>
<span class="sd"> tolerance : float, optional</span>
<span class="sd"> The convergence tolerance of iterations for L-BFGS.</span>
<span class="sd"> (default: 1e-6)</span>
<span class="sd"> validateData : bool, optional</span>
<span class="sd"> Boolean parameter which indicates if the algorithm should</span>
<span class="sd"> validate data before training.</span>
<span class="sd"> (default: True)</span>
<span class="sd"> numClasses : int, optional</span>
<span class="sd"> The number of classes (i.e., outcomes) a label can take in</span>
<span class="sd"> Multinomial Logistic Regression.</span>
<span class="sd"> (default: 2)</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; data = [</span>
<span class="sd"> ... LabeledPoint(0.0, [0.0, 1.0]),</span>
<span class="sd"> ... LabeledPoint(1.0, [1.0, 0.0]),</span>
<span class="sd"> ... ]</span>
<span class="sd"> &gt;&gt;&gt; lrm = LogisticRegressionWithLBFGS.train(sc.parallelize(data), iterations=10)</span>
<span class="sd"> &gt;&gt;&gt; lrm.predict([1.0, 0.0])</span>
<span class="sd"> 1</span>
<span class="sd"> &gt;&gt;&gt; lrm.predict([0.0, 1.0])</span>
<span class="sd"> 0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span><span class="n">rdd</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">],</span> <span class="n">i</span><span class="p">:</span> <span class="n">Vector</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Any</span><span class="p">]:</span>
<span class="k">return</span> <span class="n">callMLlibFunc</span><span class="p">(</span>
<span class="s2">&quot;trainLogisticRegressionModelWithLBFGS&quot;</span><span class="p">,</span>
<span class="n">rdd</span><span class="p">,</span>
<span class="nb">int</span><span class="p">(</span><span class="n">iterations</span><span class="p">),</span>
<span class="n">i</span><span class="p">,</span>
<span class="nb">float</span><span class="p">(</span><span class="n">regParam</span><span class="p">),</span>
<span class="n">regType</span><span class="p">,</span>
<span class="nb">bool</span><span class="p">(</span><span class="n">intercept</span><span class="p">),</span>
<span class="nb">int</span><span class="p">(</span><span class="n">corrections</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">tolerance</span><span class="p">),</span>
<span class="nb">bool</span><span class="p">(</span><span class="n">validateData</span><span class="p">),</span>
<span class="nb">int</span><span class="p">(</span><span class="n">numClasses</span><span class="p">),</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">initialWeights</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="n">numClasses</span> <span class="o">==</span> <span class="mi">2</span><span class="p">:</span>
<span class="n">initialWeights</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.0</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">first</span><span class="p">()</span><span class="o">.</span><span class="n">features</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">intercept</span><span class="p">:</span>
<span class="n">initialWeights</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.0</span><span class="p">]</span> <span class="o">*</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">first</span><span class="p">()</span><span class="o">.</span><span class="n">features</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span> <span class="o">*</span> <span class="p">(</span><span class="n">numClasses</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">initialWeights</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.0</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="o">.</span><span class="n">first</span><span class="p">()</span><span class="o">.</span><span class="n">features</span><span class="p">)</span> <span class="o">*</span> <span class="p">(</span><span class="n">numClasses</span> <span class="o">-</span> <span class="mi">1</span><span class="p">)</span>
<span class="k">return</span> <span class="n">_regression_train_wrapper</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">LogisticRegressionModel</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">initialWeights</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="SVMModel"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.SVMModel.html#pyspark.mllib.classification.SVMModel">[docs]</a><span class="k">class</span> <span class="nc">SVMModel</span><span class="p">(</span><span class="n">LinearClassificationModel</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Model for Support Vector Machines (SVMs).</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> weights : :py:class:`pyspark.mllib.linalg.Vector`</span>
<span class="sd"> Weights computed for every feature.</span>
<span class="sd"> intercept : float</span>
<span class="sd"> Intercept computed for this model.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.mllib.linalg import SparseVector</span>
<span class="sd"> &gt;&gt;&gt; data = [</span>
<span class="sd"> ... LabeledPoint(0.0, [0.0]),</span>
<span class="sd"> ... LabeledPoint(1.0, [1.0]),</span>
<span class="sd"> ... LabeledPoint(1.0, [2.0]),</span>
<span class="sd"> ... LabeledPoint(1.0, [3.0])</span>
<span class="sd"> ... ]</span>
<span class="sd"> &gt;&gt;&gt; svm = SVMWithSGD.train(sc.parallelize(data), iterations=10)</span>
<span class="sd"> &gt;&gt;&gt; svm.predict([1.0])</span>
<span class="sd"> 1</span>
<span class="sd"> &gt;&gt;&gt; svm.predict(sc.parallelize([[1.0]])).collect()</span>
<span class="sd"> [1]</span>
<span class="sd"> &gt;&gt;&gt; svm.clearThreshold()</span>
<span class="sd"> &gt;&gt;&gt; svm.predict(numpy.array([1.0]))</span>
<span class="sd"> 1.44...</span>
<span class="sd"> &gt;&gt;&gt; sparse_data = [</span>
<span class="sd"> ... LabeledPoint(0.0, SparseVector(2, {0: -1.0})),</span>
<span class="sd"> ... LabeledPoint(1.0, SparseVector(2, {1: 1.0})),</span>
<span class="sd"> ... LabeledPoint(0.0, SparseVector(2, {0: 0.0})),</span>
<span class="sd"> ... LabeledPoint(1.0, SparseVector(2, {1: 2.0}))</span>
<span class="sd"> ... ]</span>
<span class="sd"> &gt;&gt;&gt; svm = SVMWithSGD.train(sc.parallelize(sparse_data), iterations=10)</span>
<span class="sd"> &gt;&gt;&gt; svm.predict(SparseVector(2, {1: 1.0}))</span>
<span class="sd"> 1</span>
<span class="sd"> &gt;&gt;&gt; svm.predict(SparseVector(2, {0: -1.0}))</span>
<span class="sd"> 0</span>
<span class="sd"> &gt;&gt;&gt; import os, tempfile</span>
<span class="sd"> &gt;&gt;&gt; path = tempfile.mkdtemp()</span>
<span class="sd"> &gt;&gt;&gt; svm.save(sc, path)</span>
<span class="sd"> &gt;&gt;&gt; sameModel = SVMModel.load(sc, path)</span>
<span class="sd"> &gt;&gt;&gt; sameModel.predict(SparseVector(2, {1: 1.0}))</span>
<span class="sd"> 1</span>
<span class="sd"> &gt;&gt;&gt; sameModel.predict(SparseVector(2, {0: -1.0}))</span>
<span class="sd"> 0</span>
<span class="sd"> &gt;&gt;&gt; from shutil import rmtree</span>
<span class="sd"> &gt;&gt;&gt; try:</span>
<span class="sd"> ... rmtree(path)</span>
<span class="sd"> ... except BaseException:</span>
<span class="sd"> ... pass</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">weights</span><span class="p">:</span> <span class="n">Vector</span><span class="p">,</span> <span class="n">intercept</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="nb">super</span><span class="p">(</span><span class="n">SVMModel</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">weights</span><span class="p">,</span> <span class="n">intercept</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_threshold</span> <span class="o">=</span> <span class="mf">0.0</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="s2">&quot;VectorLike&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">]:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">RDD</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]:</span>
<span class="o">...</span>
<div class="viewcode-block" id="SVMModel.predict"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.SVMModel.html#pyspark.mllib.classification.SVMModel.predict">[docs]</a> <span class="k">def</span> <span class="nf">predict</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">,</span> <span class="n">RDD</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">]]</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">RDD</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">]],</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Predict values for a single data point or an RDD of points</span>
<span class="sd"> using the model trained.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span>
<span class="k">return</span> <span class="n">x</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">v</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">v</span><span class="p">))</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">_convert_to_vector</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="n">margin</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">weights</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">x</span><span class="p">)</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">intercept</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_threshold</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="n">margin</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">margin</span> <span class="o">&gt;</span> <span class="bp">self</span><span class="o">.</span><span class="n">_threshold</span> <span class="k">else</span> <span class="mi">0</span></div>
<div class="viewcode-block" id="SVMModel.save"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.SVMModel.html#pyspark.mllib.classification.SVMModel.save">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Save this model to the given path.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">classification</span><span class="o">.</span><span class="n">SVMModel</span><span class="p">(</span>
<span class="n">_py2java</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_coeff</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">intercept</span>
<span class="p">)</span>
<span class="n">java_model</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">sc</span><span class="o">.</span><span class="n">_jsc</span><span class="o">.</span><span class="n">sc</span><span class="p">(),</span> <span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="SVMModel.load"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.SVMModel.html#pyspark.mllib.classification.SVMModel.load">[docs]</a> <span class="nd">@classmethod</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;SVMModel&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Load a model from the given path.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">classification</span><span class="o">.</span><span class="n">SVMModel</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">sc</span><span class="o">.</span><span class="n">_jsc</span><span class="o">.</span><span class="n">sc</span><span class="p">(),</span> <span class="n">path</span><span class="p">)</span>
<span class="n">weights</span> <span class="o">=</span> <span class="n">_java2py</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">java_model</span><span class="o">.</span><span class="n">weights</span><span class="p">())</span>
<span class="n">intercept</span> <span class="o">=</span> <span class="n">java_model</span><span class="o">.</span><span class="n">intercept</span><span class="p">()</span>
<span class="n">threshold</span> <span class="o">=</span> <span class="n">java_model</span><span class="o">.</span><span class="n">getThreshold</span><span class="p">()</span><span class="o">.</span><span class="n">get</span><span class="p">()</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">SVMModel</span><span class="p">(</span><span class="n">weights</span><span class="p">,</span> <span class="n">intercept</span><span class="p">)</span>
<span class="n">model</span><span class="o">.</span><span class="n">setThreshold</span><span class="p">(</span><span class="n">threshold</span><span class="p">)</span>
<span class="k">return</span> <span class="n">model</span></div></div>
<div class="viewcode-block" id="SVMWithSGD"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.SVMWithSGD.html#pyspark.mllib.classification.SVMWithSGD">[docs]</a><span class="k">class</span> <span class="nc">SVMWithSGD</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train a Support Vector Machine (SVM) using Stochastic Gradient Descent.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="SVMWithSGD.train"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.SVMWithSGD.html#pyspark.mllib.classification.SVMWithSGD.train">[docs]</a> <span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span>
<span class="bp">cls</span><span class="p">,</span>
<span class="n">data</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">],</span>
<span class="n">iterations</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">100</span><span class="p">,</span>
<span class="n">step</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span>
<span class="n">regParam</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.01</span><span class="p">,</span>
<span class="n">miniBatchFraction</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span>
<span class="n">initialWeights</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">regType</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;l2&quot;</span><span class="p">,</span>
<span class="n">intercept</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">validateData</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">convergenceTol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.001</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SVMModel</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train a support vector machine on the given data.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> data : :py:class:`pyspark.RDD`</span>
<span class="sd"> The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`.</span>
<span class="sd"> iterations : int, optional</span>
<span class="sd"> The number of iterations.</span>
<span class="sd"> (default: 100)</span>
<span class="sd"> step : float, optional</span>
<span class="sd"> The step parameter used in SGD.</span>
<span class="sd"> (default: 1.0)</span>
<span class="sd"> regParam : float, optional</span>
<span class="sd"> The regularizer parameter.</span>
<span class="sd"> (default: 0.01)</span>
<span class="sd"> miniBatchFraction : float, optional</span>
<span class="sd"> Fraction of data to be used for each SGD iteration.</span>
<span class="sd"> (default: 1.0)</span>
<span class="sd"> initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional</span>
<span class="sd"> The initial weights.</span>
<span class="sd"> (default: None)</span>
<span class="sd"> regType : str, optional</span>
<span class="sd"> The type of regularizer used for training our model.</span>
<span class="sd"> Allowed values:</span>
<span class="sd"> - &quot;l1&quot; for using L1 regularization</span>
<span class="sd"> - &quot;l2&quot; for using L2 regularization (default)</span>
<span class="sd"> - None for no regularization</span>
<span class="sd"> intercept : bool, optional</span>
<span class="sd"> Boolean parameter which indicates the use or not of the</span>
<span class="sd"> augmented representation for training data (i.e. whether bias</span>
<span class="sd"> features are activated or not).</span>
<span class="sd"> (default: False)</span>
<span class="sd"> validateData : bool, optional</span>
<span class="sd"> Boolean parameter which indicates if the algorithm should</span>
<span class="sd"> validate data before training.</span>
<span class="sd"> (default: True)</span>
<span class="sd"> convergenceTol : float, optional</span>
<span class="sd"> A condition which decides iteration termination.</span>
<span class="sd"> (default: 0.001)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span><span class="n">rdd</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">],</span> <span class="n">i</span><span class="p">:</span> <span class="n">Vector</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Any</span><span class="p">]:</span>
<span class="k">return</span> <span class="n">callMLlibFunc</span><span class="p">(</span>
<span class="s2">&quot;trainSVMModelWithSGD&quot;</span><span class="p">,</span>
<span class="n">rdd</span><span class="p">,</span>
<span class="nb">int</span><span class="p">(</span><span class="n">iterations</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">step</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">regParam</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">miniBatchFraction</span><span class="p">),</span>
<span class="n">i</span><span class="p">,</span>
<span class="n">regType</span><span class="p">,</span>
<span class="nb">bool</span><span class="p">(</span><span class="n">intercept</span><span class="p">),</span>
<span class="nb">bool</span><span class="p">(</span><span class="n">validateData</span><span class="p">),</span>
<span class="nb">float</span><span class="p">(</span><span class="n">convergenceTol</span><span class="p">),</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">_regression_train_wrapper</span><span class="p">(</span><span class="n">train</span><span class="p">,</span> <span class="n">SVMModel</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">initialWeights</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="NaiveBayesModel"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.NaiveBayesModel.html#pyspark.mllib.classification.NaiveBayesModel">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">NaiveBayesModel</span><span class="p">(</span><span class="n">Saveable</span><span class="p">,</span> <span class="n">Loader</span><span class="p">[</span><span class="s2">&quot;NaiveBayesModel&quot;</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Model for Naive Bayes classifiers.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> labels : :py:class:`numpy.ndarray`</span>
<span class="sd"> List of labels.</span>
<span class="sd"> pi : :py:class:`numpy.ndarray`</span>
<span class="sd"> Log of class priors, whose dimension is C, number of labels.</span>
<span class="sd"> theta : :py:class:`numpy.ndarray`</span>
<span class="sd"> Log of class conditional probabilities, whose dimension is C-by-D,</span>
<span class="sd"> where D is number of features.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.mllib.linalg import SparseVector</span>
<span class="sd"> &gt;&gt;&gt; data = [</span>
<span class="sd"> ... LabeledPoint(0.0, [0.0, 0.0]),</span>
<span class="sd"> ... LabeledPoint(0.0, [0.0, 1.0]),</span>
<span class="sd"> ... LabeledPoint(1.0, [1.0, 0.0]),</span>
<span class="sd"> ... ]</span>
<span class="sd"> &gt;&gt;&gt; model = NaiveBayes.train(sc.parallelize(data))</span>
<span class="sd"> &gt;&gt;&gt; model.predict(numpy.array([0.0, 1.0]))</span>
<span class="sd"> 0.0</span>
<span class="sd"> &gt;&gt;&gt; model.predict(numpy.array([1.0, 0.0]))</span>
<span class="sd"> 1.0</span>
<span class="sd"> &gt;&gt;&gt; model.predict(sc.parallelize([[1.0, 0.0]])).collect()</span>
<span class="sd"> [1.0]</span>
<span class="sd"> &gt;&gt;&gt; sparse_data = [</span>
<span class="sd"> ... LabeledPoint(0.0, SparseVector(2, {1: 0.0})),</span>
<span class="sd"> ... LabeledPoint(0.0, SparseVector(2, {1: 1.0})),</span>
<span class="sd"> ... LabeledPoint(1.0, SparseVector(2, {0: 1.0}))</span>
<span class="sd"> ... ]</span>
<span class="sd"> &gt;&gt;&gt; model = NaiveBayes.train(sc.parallelize(sparse_data))</span>
<span class="sd"> &gt;&gt;&gt; model.predict(SparseVector(2, {1: 1.0}))</span>
<span class="sd"> 0.0</span>
<span class="sd"> &gt;&gt;&gt; model.predict(SparseVector(2, {0: 1.0}))</span>
<span class="sd"> 1.0</span>
<span class="sd"> &gt;&gt;&gt; import os, tempfile</span>
<span class="sd"> &gt;&gt;&gt; path = tempfile.mkdtemp()</span>
<span class="sd"> &gt;&gt;&gt; model.save(sc, path)</span>
<span class="sd"> &gt;&gt;&gt; sameModel = NaiveBayesModel.load(sc, path)</span>
<span class="sd"> &gt;&gt;&gt; sameModel.predict(SparseVector(2, {0: 1.0})) == model.predict(SparseVector(2, {0: 1.0}))</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; from shutil import rmtree</span>
<span class="sd"> &gt;&gt;&gt; try:</span>
<span class="sd"> ... rmtree(path)</span>
<span class="sd"> ... except OSError:</span>
<span class="sd"> ... pass</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">labels</span><span class="p">:</span> <span class="n">numpy</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="n">pi</span><span class="p">:</span> <span class="n">numpy</span><span class="o">.</span><span class="n">ndarray</span><span class="p">,</span> <span class="n">theta</span><span class="p">:</span> <span class="n">numpy</span><span class="o">.</span><span class="n">ndarray</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">labels</span> <span class="o">=</span> <span class="n">labels</span>
<span class="bp">self</span><span class="o">.</span><span class="n">pi</span> <span class="o">=</span> <span class="n">pi</span>
<span class="bp">self</span><span class="o">.</span><span class="n">theta</span> <span class="o">=</span> <span class="n">theta</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="s2">&quot;VectorLike&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">numpy</span><span class="o">.</span><span class="n">float64</span><span class="p">:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">RDD</span><span class="p">[</span><span class="n">numpy</span><span class="o">.</span><span class="n">float64</span><span class="p">]:</span>
<span class="o">...</span>
<div class="viewcode-block" id="NaiveBayesModel.predict"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.NaiveBayesModel.html#pyspark.mllib.classification.NaiveBayesModel.predict">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;0.9.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">,</span> <span class="n">RDD</span><span class="p">[</span><span class="s2">&quot;VectorLike&quot;</span><span class="p">]]</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">numpy</span><span class="o">.</span><span class="n">float64</span><span class="p">,</span> <span class="n">RDD</span><span class="p">[</span><span class="n">numpy</span><span class="o">.</span><span class="n">float64</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the most likely class for a data vector</span>
<span class="sd"> or an RDD of vectors</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">RDD</span><span class="p">):</span>
<span class="k">return</span> <span class="n">x</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">v</span><span class="p">:</span> <span class="bp">self</span><span class="o">.</span><span class="n">predict</span><span class="p">(</span><span class="n">v</span><span class="p">))</span>
<span class="n">x</span> <span class="o">=</span> <span class="n">_convert_to_vector</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="p">[</span>
<span class="n">numpy</span><span class="o">.</span><span class="n">argmax</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">pi</span> <span class="o">+</span> <span class="n">x</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">theta</span><span class="o">.</span><span class="n">transpose</span><span class="p">()))</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="p">]</span></div>
<div class="viewcode-block" id="NaiveBayesModel.save"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.NaiveBayesModel.html#pyspark.mllib.classification.NaiveBayesModel.save">[docs]</a> <span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Save this model to the given path.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_labels</span> <span class="o">=</span> <span class="n">_py2java</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
<span class="n">java_pi</span> <span class="o">=</span> <span class="n">_py2java</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">pi</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
<span class="n">java_theta</span> <span class="o">=</span> <span class="n">_py2java</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">theta</span><span class="o">.</span><span class="n">tolist</span><span class="p">())</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">classification</span><span class="o">.</span><span class="n">NaiveBayesModel</span><span class="p">(</span>
<span class="n">java_labels</span><span class="p">,</span> <span class="n">java_pi</span><span class="p">,</span> <span class="n">java_theta</span>
<span class="p">)</span>
<span class="n">java_model</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">sc</span><span class="o">.</span><span class="n">_jsc</span><span class="o">.</span><span class="n">sc</span><span class="p">(),</span> <span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="NaiveBayesModel.load"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.NaiveBayesModel.html#pyspark.mllib.classification.NaiveBayesModel.load">[docs]</a> <span class="nd">@classmethod</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;NaiveBayesModel&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Load a model from the given path.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">classification</span><span class="o">.</span><span class="n">NaiveBayesModel</span><span class="o">.</span><span class="n">load</span><span class="p">(</span>
<span class="n">sc</span><span class="o">.</span><span class="n">_jsc</span><span class="o">.</span><span class="n">sc</span><span class="p">(),</span> <span class="n">path</span>
<span class="p">)</span>
<span class="c1"># Can not unpickle array.array from Pickle in Python3 with &quot;bytes&quot;</span>
<span class="n">py_labels</span> <span class="o">=</span> <span class="n">_java2py</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">java_model</span><span class="o">.</span><span class="n">labels</span><span class="p">(),</span> <span class="s2">&quot;latin1&quot;</span><span class="p">)</span>
<span class="n">py_pi</span> <span class="o">=</span> <span class="n">_java2py</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">java_model</span><span class="o">.</span><span class="n">pi</span><span class="p">(),</span> <span class="s2">&quot;latin1&quot;</span><span class="p">)</span>
<span class="n">py_theta</span> <span class="o">=</span> <span class="n">_java2py</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">java_model</span><span class="o">.</span><span class="n">theta</span><span class="p">(),</span> <span class="s2">&quot;latin1&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">NaiveBayesModel</span><span class="p">(</span><span class="n">py_labels</span><span class="p">,</span> <span class="n">py_pi</span><span class="p">,</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">py_theta</span><span class="p">))</span></div></div>
<div class="viewcode-block" id="NaiveBayes"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.NaiveBayes.html#pyspark.mllib.classification.NaiveBayes">[docs]</a><span class="k">class</span> <span class="nc">NaiveBayes</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train a Multinomial Naive Bayes model.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="NaiveBayes.train"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.NaiveBayes.html#pyspark.mllib.classification.NaiveBayes.train">[docs]</a> <span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">train</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">data</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">],</span> <span class="n">lambda_</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">NaiveBayesModel</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train a Naive Bayes model given an RDD of (label, features)</span>
<span class="sd"> vectors.</span>
<span class="sd"> This is the `Multinomial NB &lt;http://tinyurl.com/lsdw6p&gt;`_ which</span>
<span class="sd"> can handle all kinds of discrete data. For example, by</span>
<span class="sd"> converting documents into TF-IDF vectors, it can be used for</span>
<span class="sd"> document classification. By making every vector a 0-1 vector,</span>
<span class="sd"> it can also be used as `Bernoulli NB &lt;http://tinyurl.com/p7c96j6&gt;`_.</span>
<span class="sd"> The input feature values must be nonnegative.</span>
<span class="sd"> .. versionadded:: 0.9.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> data : :py:class:`pyspark.RDD`</span>
<span class="sd"> The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`.</span>
<span class="sd"> lambda\\_ : float, optional</span>
<span class="sd"> The smoothing parameter.</span>
<span class="sd"> (default: 1.0)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">first</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">first</span><span class="p">,</span> <span class="n">LabeledPoint</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;`data` should be an RDD of LabeledPoint&quot;</span><span class="p">)</span>
<span class="n">labels</span><span class="p">,</span> <span class="n">pi</span><span class="p">,</span> <span class="n">theta</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s2">&quot;trainNaiveBayesModel&quot;</span><span class="p">,</span> <span class="n">data</span><span class="p">,</span> <span class="n">lambda_</span><span class="p">)</span>
<span class="k">return</span> <span class="n">NaiveBayesModel</span><span class="p">(</span><span class="n">labels</span><span class="o">.</span><span class="n">toArray</span><span class="p">(),</span> <span class="n">pi</span><span class="o">.</span><span class="n">toArray</span><span class="p">(),</span> <span class="n">numpy</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">theta</span><span class="p">))</span></div></div>
<div class="viewcode-block" id="StreamingLogisticRegressionWithSGD"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.html#pyspark.mllib.classification.StreamingLogisticRegressionWithSGD">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">StreamingLogisticRegressionWithSGD</span><span class="p">(</span><span class="n">StreamingLinearAlgorithm</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Train or predict a logistic regression model on streaming data.</span>
<span class="sd"> Training uses Stochastic Gradient Descent to update the model based on</span>
<span class="sd"> each new batch of incoming data from a DStream.</span>
<span class="sd"> Each batch of data is assumed to be an RDD of LabeledPoints.</span>
<span class="sd"> The number of data points per batch can vary, but the number</span>
<span class="sd"> of features must be constant. An initial weight</span>
<span class="sd"> vector must be provided.</span>
<span class="sd"> .. versionadded:: 1.5.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> stepSize : float, optional</span>
<span class="sd"> Step size for each iteration of gradient descent.</span>
<span class="sd"> (default: 0.1)</span>
<span class="sd"> numIterations : int, optional</span>
<span class="sd"> Number of iterations run for each batch of data.</span>
<span class="sd"> (default: 50)</span>
<span class="sd"> miniBatchFraction : float, optional</span>
<span class="sd"> Fraction of each batch of data to use for updates.</span>
<span class="sd"> (default: 1.0)</span>
<span class="sd"> regParam : float, optional</span>
<span class="sd"> L2 Regularization parameter.</span>
<span class="sd"> (default: 0.0)</span>
<span class="sd"> convergenceTol : float, optional</span>
<span class="sd"> Value used to determine when to terminate iterations.</span>
<span class="sd"> (default: 0.001)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">stepSize</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.1</span><span class="p">,</span>
<span class="n">numIterations</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">50</span><span class="p">,</span>
<span class="n">miniBatchFraction</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1.0</span><span class="p">,</span>
<span class="n">regParam</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.0</span><span class="p">,</span>
<span class="n">convergenceTol</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">0.001</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">stepSize</span> <span class="o">=</span> <span class="n">stepSize</span>
<span class="bp">self</span><span class="o">.</span><span class="n">numIterations</span> <span class="o">=</span> <span class="n">numIterations</span>
<span class="bp">self</span><span class="o">.</span><span class="n">regParam</span> <span class="o">=</span> <span class="n">regParam</span>
<span class="bp">self</span><span class="o">.</span><span class="n">miniBatchFraction</span> <span class="o">=</span> <span class="n">miniBatchFraction</span>
<span class="bp">self</span><span class="o">.</span><span class="n">convergenceTol</span> <span class="o">=</span> <span class="n">convergenceTol</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_model</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">LogisticRegressionModel</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="nb">super</span><span class="p">(</span><span class="n">StreamingLogisticRegressionWithSGD</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">model</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_model</span><span class="p">)</span>
<div class="viewcode-block" id="StreamingLogisticRegressionWithSGD.setInitialWeights"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.html#pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.setInitialWeights">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.5.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">setInitialWeights</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">initialWeights</span><span class="p">:</span> <span class="s2">&quot;VectorLike&quot;</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;StreamingLogisticRegressionWithSGD&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Set the initial value of weights.</span>
<span class="sd"> This must be set before running trainOn and predictOn.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">initialWeights</span> <span class="o">=</span> <span class="n">_convert_to_vector</span><span class="p">(</span><span class="n">initialWeights</span><span class="p">)</span>
<span class="c1"># LogisticRegressionWithSGD does only binary classification.</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_model</span> <span class="o">=</span> <span class="n">LogisticRegressionModel</span><span class="p">(</span>
<span class="n">initialWeights</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">initialWeights</span><span class="o">.</span><span class="n">size</span><span class="p">,</span> <span class="mi">2</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="StreamingLogisticRegressionWithSGD.trainOn"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.html#pyspark.mllib.classification.StreamingLogisticRegressionWithSGD.trainOn">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.5.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">trainOn</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dstream</span><span class="p">:</span> <span class="s2">&quot;DStream[LabeledPoint]&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Train the model on the incoming dstream.&quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_validate</span><span class="p">(</span><span class="n">dstream</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">update</span><span class="p">(</span><span class="n">rdd</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">LabeledPoint</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># LogisticRegressionWithSGD.train raises an error for an empty RDD.</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">rdd</span><span class="o">.</span><span class="n">isEmpty</span><span class="p">():</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_model</span> <span class="o">=</span> <span class="n">LogisticRegressionWithSGD</span><span class="o">.</span><span class="n">train</span><span class="p">(</span>
<span class="n">rdd</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">numIterations</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">stepSize</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">miniBatchFraction</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_model</span><span class="o">.</span><span class="n">weights</span><span class="p">,</span> <span class="c1"># type: ignore[union-attr]</span>
<span class="n">regParam</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">regParam</span><span class="p">,</span>
<span class="n">convergenceTol</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">convergenceTol</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">dstream</span><span class="o">.</span><span class="n">foreachRDD</span><span class="p">(</span><span class="n">update</span><span class="p">)</span></div></div>
<span class="k">def</span> <span class="nf">_test</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="kn">import</span> <span class="nn">doctest</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span>
<span class="kn">import</span> <span class="nn">pyspark.mllib.classification</span>
<span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">classification</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">spark</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">master</span><span class="p">(</span><span class="s2">&quot;local[4]&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">&quot;mllib.classification tests&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span>
<span class="p">)</span>
<span class="n">globs</span><span class="p">[</span><span class="s2">&quot;sc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">sparkContext</span>
<span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span><span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span> <span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span><span class="p">)</span>
<span class="n">spark</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span>
<span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span>
<span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="n">_test</span><span class="p">()</span>
</pre></div>
</article>
<footer class="bd-footer-article">
<div class="footer-article-items footer-article__inner">
<div class="footer-article-item"><!-- Previous / next buttons -->
<div class="prev-next-area">
</div></div>
</div>
</footer>
</div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52"></script>
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item"><p class="copyright">
Copyright @ 2024 The Apache Software Foundation, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
</p></div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 4.5.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item"><p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.13.3.
</p></div>
</div>
</div>
</footer>
</body>
</html>