blob: f754d5ee9af8b5b19d58f98bec6d9bab81f5f809 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>pyspark.ml.base &#8212; PySpark 3.5.3 documentation</title>
<link href="../../../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link rel="stylesheet"
href="../../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet" href="../../../_static/styles/pydata-sphinx-theme.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf">
<script id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/language_data.js"></script>
<script src="../../../_static/clipboard.min.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/ml/base.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="None">
<!-- Google Analytics -->
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<div class="container-fluid" id="banner"></div>
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main"><div class="container-xl">
<div id="navbar-start">
<a class="navbar-brand" href="../../../index.html">
<img src="../../../_static/spark-logo-reverse.png" class="logo" alt="logo">
</a>
</div>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-collapsible" aria-controls="navbar-collapsible" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-collapsible" class="col-lg-9 collapse navbar-collapse">
<div id="navbar-center" class="mr-auto">
<div class="navbar-center-item">
<ul id="navbar-main-elements" class="navbar-nav">
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../index.html">
Overview
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../getting_started/index.html">
Getting Started
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../user_guide/index.html">
User Guides
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../reference/index.html">
API Reference
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../development/index.html">
Development
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</div>
</div>
<div id="navbar-end">
<div class="navbar-end-item">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
3.5.3
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "_modules/pyspark/ml/base.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script>
</div>
</div>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<!-- Only show if we have sidebars configured, else just a small margin -->
<div class="col-12 col-md-3 bd-sidebar">
<div class="sidebar-start-items"><form class="bd-search d-flex align-items-center" action="../../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form><nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
</div>
</nav>
</div>
<div class="sidebar-end-items">
</div>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<h1>Source code for pyspark.ml.base</h1><div class="highlight"><pre>
<span></span><span class="c1">#</span>
<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
<span class="c1"># this work for additional information regarding copyright ownership.</span>
<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
<span class="c1"># (the &quot;License&quot;); you may not use this file except in compliance with</span>
<span class="c1"># the License. You may obtain a copy of the License at</span>
<span class="c1">#</span>
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="c1">#</span>
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
<span class="c1"># See the License for the specific language governing permissions and</span>
<span class="c1"># limitations under the License.</span>
<span class="c1">#</span>
<span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span><span class="p">,</span> <span class="n">abstractmethod</span>
<span class="kn">import</span> <span class="nn">copy</span>
<span class="kn">import</span> <span class="nn">threading</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">Any</span><span class="p">,</span>
<span class="n">Callable</span><span class="p">,</span>
<span class="n">Generic</span><span class="p">,</span>
<span class="n">Iterator</span><span class="p">,</span>
<span class="n">List</span><span class="p">,</span>
<span class="n">Optional</span><span class="p">,</span>
<span class="n">Sequence</span><span class="p">,</span>
<span class="n">Tuple</span><span class="p">,</span>
<span class="n">TypeVar</span><span class="p">,</span>
<span class="n">Union</span><span class="p">,</span>
<span class="n">cast</span><span class="p">,</span>
<span class="n">overload</span><span class="p">,</span>
<span class="n">TYPE_CHECKING</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">since</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.param</span> <span class="kn">import</span> <span class="n">P</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.common</span> <span class="kn">import</span> <span class="n">inherit_doc</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.param.shared</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">HasInputCol</span><span class="p">,</span>
<span class="n">HasOutputCol</span><span class="p">,</span>
<span class="n">HasLabelCol</span><span class="p">,</span>
<span class="n">HasFeaturesCol</span><span class="p">,</span>
<span class="n">HasPredictionCol</span><span class="p">,</span>
<span class="n">Params</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.dataframe</span> <span class="kn">import</span> <span class="n">DataFrame</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.functions</span> <span class="kn">import</span> <span class="n">udf</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="kn">import</span> <span class="n">DataType</span><span class="p">,</span> <span class="n">StructField</span><span class="p">,</span> <span class="n">StructType</span>
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">pyspark.ml._typing</span> <span class="kn">import</span> <span class="n">ParamMap</span>
<span class="n">T</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;T&quot;</span><span class="p">)</span>
<span class="n">M</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;M&quot;</span><span class="p">,</span> <span class="n">bound</span><span class="o">=</span><span class="s2">&quot;Transformer&quot;</span><span class="p">)</span>
<span class="k">class</span> <span class="nc">_FitMultipleIterator</span><span class="p">(</span><span class="n">Generic</span><span class="p">[</span><span class="n">M</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Used by default implementation of Estimator.fitMultiple to produce models in a thread safe</span>
<span class="sd"> iterator. This class handles the simple case of fitMultiple where each param map should be</span>
<span class="sd"> fit independently.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> fitSingleModel : function</span>
<span class="sd"> Callable[[int], Transformer] which fits an estimator to a dataset.</span>
<span class="sd"> `fitSingleModel` may be called up to `numModels` times, with a unique index each time.</span>
<span class="sd"> Each call to `fitSingleModel` with an index should return the Model associated with</span>
<span class="sd"> that index.</span>
<span class="sd"> numModel : int</span>
<span class="sd"> Number of models this iterator should produce.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> See :py:meth:`Estimator.fitMultiple` for more info.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">fitSingleModel</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="nb">int</span><span class="p">],</span> <span class="n">M</span><span class="p">],</span> <span class="n">numModels</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot; &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">fitSingleModel</span> <span class="o">=</span> <span class="n">fitSingleModel</span>
<span class="bp">self</span><span class="o">.</span><span class="n">numModel</span> <span class="o">=</span> <span class="n">numModels</span>
<span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">=</span> <span class="mi">0</span>
<span class="bp">self</span><span class="o">.</span><span class="n">lock</span> <span class="o">=</span> <span class="n">threading</span><span class="o">.</span><span class="n">Lock</span><span class="p">()</span>
<span class="k">def</span> <span class="fm">__iter__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterator</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">M</span><span class="p">]]:</span>
<span class="k">return</span> <span class="bp">self</span>
<span class="k">def</span> <span class="fm">__next__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">M</span><span class="p">]:</span>
<span class="k">with</span> <span class="bp">self</span><span class="o">.</span><span class="n">lock</span><span class="p">:</span>
<span class="n">index</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">counter</span>
<span class="k">if</span> <span class="n">index</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">numModel</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">StopIteration</span><span class="p">(</span><span class="s2">&quot;No models remaining.&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">counter</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">return</span> <span class="n">index</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">fitSingleModel</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">next</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">M</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;For python2 compatibility.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="fm">__next__</span><span class="p">()</span>
<div class="viewcode-block" id="Estimator"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.Estimator.html#pyspark.ml.Estimator">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">Estimator</span><span class="p">(</span><span class="n">Params</span><span class="p">,</span> <span class="n">Generic</span><span class="p">[</span><span class="n">M</span><span class="p">],</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Abstract class for estimators that fit models to data.</span>
<span class="sd"> .. versionadded:: 1.3.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">_fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">M</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Fits a model to the input dataset. This is called by the default implementation of fit.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> dataset : :py:class:`pyspark.sql.DataFrame`</span>
<span class="sd"> input dataset</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> :class:`Transformer`</span>
<span class="sd"> fitted model</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span>
<div class="viewcode-block" id="Estimator.fitMultiple"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.Estimator.html#pyspark.ml.Estimator.fitMultiple">[docs]</a> <span class="k">def</span> <span class="nf">fitMultiple</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">paramMaps</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="s2">&quot;ParamMap&quot;</span><span class="p">]</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterator</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">M</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Fits a model to the input dataset for each param map in `paramMaps`.</span>
<span class="sd"> .. versionadded:: 2.3.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> dataset : :py:class:`pyspark.sql.DataFrame`</span>
<span class="sd"> input dataset.</span>
<span class="sd"> paramMaps : :py:class:`collections.abc.Sequence`</span>
<span class="sd"> A Sequence of param maps.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> :py:class:`_FitMultipleIterator`</span>
<span class="sd"> A thread safe iterable which contains one model for each param map. Each</span>
<span class="sd"> call to `next(modelIterator)` will return `(index, model)` where model was fit</span>
<span class="sd"> using `paramMaps[index]`. `index` values may not be sequential.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">estimator</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">fitSingleModel</span><span class="p">(</span><span class="n">index</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">M</span><span class="p">:</span>
<span class="k">return</span> <span class="n">estimator</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">paramMaps</span><span class="p">[</span><span class="n">index</span><span class="p">])</span>
<span class="k">return</span> <span class="n">_FitMultipleIterator</span><span class="p">(</span><span class="n">fitSingleModel</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">paramMaps</span><span class="p">))</span></div>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;ParamMap&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="o">...</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">M</span><span class="p">:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">fit</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="s2">&quot;ParamMap&quot;</span><span class="p">],</span> <span class="n">Tuple</span><span class="p">[</span><span class="s2">&quot;ParamMap&quot;</span><span class="p">]]</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="n">M</span><span class="p">]:</span>
<span class="o">...</span>
<div class="viewcode-block" id="Estimator.fit"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.Estimator.html#pyspark.ml.Estimator.fit">[docs]</a> <span class="k">def</span> <span class="nf">fit</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span>
<span class="n">params</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="s2">&quot;ParamMap&quot;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="s2">&quot;ParamMap&quot;</span><span class="p">],</span> <span class="n">Tuple</span><span class="p">[</span><span class="s2">&quot;ParamMap&quot;</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">M</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">M</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Fits a model to the input dataset with optional parameters.</span>
<span class="sd"> .. versionadded:: 1.3.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> dataset : :py:class:`pyspark.sql.DataFrame`</span>
<span class="sd"> input dataset.</span>
<span class="sd"> params : dict or list or tuple, optional</span>
<span class="sd"> an optional param map that overrides embedded params. If a list/tuple of</span>
<span class="sd"> param maps is given, this calls fit on each param map and returns a list of</span>
<span class="sd"> models.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> :py:class:`Transformer` or a list of :py:class:`Transformer`</span>
<span class="sd"> fitted model(s)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">params</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">params</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="p">(</span><span class="nb">list</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)):</span>
<span class="n">models</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Optional</span><span class="p">[</span><span class="n">M</span><span class="p">]]</span> <span class="o">=</span> <span class="p">[</span><span class="kc">None</span><span class="p">]</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
<span class="k">for</span> <span class="n">index</span><span class="p">,</span> <span class="n">model</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">fitMultiple</span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
<span class="n">models</span><span class="p">[</span><span class="n">index</span><span class="p">]</span> <span class="o">=</span> <span class="n">model</span>
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">List</span><span class="p">[</span><span class="n">M</span><span class="p">],</span> <span class="n">models</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="k">if</span> <span class="n">params</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">params</span><span class="p">)</span><span class="o">.</span><span class="n">_fit</span><span class="p">(</span><span class="n">dataset</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_fit</span><span class="p">(</span><span class="n">dataset</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;Params must be either a param map or a list/tuple of param maps, &quot;</span>
<span class="s2">&quot;but got </span><span class="si">%s</span><span class="s2">.&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
<span class="p">)</span></div></div>
<div class="viewcode-block" id="Transformer"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.Transformer.html#pyspark.ml.Transformer">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">Transformer</span><span class="p">(</span><span class="n">Params</span><span class="p">,</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Abstract class for transformers that transform one dataset into another.</span>
<span class="sd"> .. versionadded:: 1.3.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">_transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Transforms the input dataset.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> dataset : :py:class:`pyspark.sql.DataFrame`</span>
<span class="sd"> input dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> :py:class:`pyspark.sql.DataFrame`</span>
<span class="sd"> transformed dataset</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span>
<div class="viewcode-block" id="Transformer.transform"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.Transformer.html#pyspark.ml.Transformer.transform">[docs]</a> <span class="k">def</span> <span class="nf">transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;ParamMap&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Transforms the input dataset with optional parameters.</span>
<span class="sd"> .. versionadded:: 1.3.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> dataset : :py:class:`pyspark.sql.DataFrame`</span>
<span class="sd"> input dataset</span>
<span class="sd"> params : dict, optional</span>
<span class="sd"> an optional param map that overrides embedded params.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> :py:class:`pyspark.sql.DataFrame`</span>
<span class="sd"> transformed dataset</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">params</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">params</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="k">if</span> <span class="n">params</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">params</span><span class="p">)</span><span class="o">.</span><span class="n">_transform</span><span class="p">(</span><span class="n">dataset</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_transform</span><span class="p">(</span><span class="n">dataset</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;Params must be a param map but got </span><span class="si">%s</span><span class="s2">.&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">params</span><span class="p">))</span></div></div>
<div class="viewcode-block" id="Model"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.Model.html#pyspark.ml.Model">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">Model</span><span class="p">(</span><span class="n">Transformer</span><span class="p">,</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Abstract class for models that are fitted by estimators.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">pass</span></div>
<div class="viewcode-block" id="UnaryTransformer"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.UnaryTransformer.html#pyspark.ml.UnaryTransformer">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">UnaryTransformer</span><span class="p">(</span><span class="n">HasInputCol</span><span class="p">,</span> <span class="n">HasOutputCol</span><span class="p">,</span> <span class="n">Transformer</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Abstract class for transformers that take one input column, apply transformation,</span>
<span class="sd"> and output the result as a new column.</span>
<span class="sd"> .. versionadded:: 2.3.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="UnaryTransformer.setInputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.UnaryTransformer.html#pyspark.ml.UnaryTransformer.setInputCol">[docs]</a> <span class="k">def</span> <span class="nf">setInputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">P</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Sets the value of :py:attr:`inputCol`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div>
<div class="viewcode-block" id="UnaryTransformer.setOutputCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.UnaryTransformer.html#pyspark.ml.UnaryTransformer.setOutputCol">[docs]</a> <span class="k">def</span> <span class="nf">setOutputCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">P</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Sets the value of :py:attr:`outputCol`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">outputCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div>
<div class="viewcode-block" id="UnaryTransformer.createTransformFunc"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.UnaryTransformer.html#pyspark.ml.UnaryTransformer.createTransformFunc">[docs]</a> <span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">createTransformFunc</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Creates the transform function using the given param map. The input param map already takes</span>
<span class="sd"> account of the embedded param map. So the param values should be determined</span>
<span class="sd"> solely by the input param map.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
<div class="viewcode-block" id="UnaryTransformer.outputDataType"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.UnaryTransformer.html#pyspark.ml.UnaryTransformer.outputDataType">[docs]</a> <span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">outputDataType</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataType</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the data type of the output column.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
<div class="viewcode-block" id="UnaryTransformer.validateInputType"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.UnaryTransformer.html#pyspark.ml.UnaryTransformer.validateInputType">[docs]</a> <span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">validateInputType</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">inputType</span><span class="p">:</span> <span class="n">DataType</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Validates the input type. Throw an exception if it is invalid.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div>
<div class="viewcode-block" id="UnaryTransformer.transformSchema"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.UnaryTransformer.html#pyspark.ml.UnaryTransformer.transformSchema">[docs]</a> <span class="k">def</span> <span class="nf">transformSchema</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">schema</span><span class="p">:</span> <span class="n">StructType</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">StructType</span><span class="p">:</span>
<span class="n">inputType</span> <span class="o">=</span> <span class="n">schema</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">getInputCol</span><span class="p">()]</span><span class="o">.</span><span class="n">dataType</span>
<span class="bp">self</span><span class="o">.</span><span class="n">validateInputType</span><span class="p">(</span><span class="n">inputType</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOutputCol</span><span class="p">()</span> <span class="ow">in</span> <span class="n">schema</span><span class="o">.</span><span class="n">names</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Output column </span><span class="si">%s</span><span class="s2"> already exists.&quot;</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">getOutputCol</span><span class="p">())</span>
<span class="n">outputFields</span> <span class="o">=</span> <span class="n">copy</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">schema</span><span class="o">.</span><span class="n">fields</span><span class="p">)</span>
<span class="n">outputFields</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">StructField</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">getOutputCol</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">outputDataType</span><span class="p">(),</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
<span class="k">return</span> <span class="n">StructType</span><span class="p">(</span><span class="n">outputFields</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_transform</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">transformSchema</span><span class="p">(</span><span class="n">dataset</span><span class="o">.</span><span class="n">schema</span><span class="p">)</span>
<span class="n">transformUDF</span> <span class="o">=</span> <span class="n">udf</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">createTransformFunc</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">outputDataType</span><span class="p">())</span>
<span class="n">transformedDataset</span> <span class="o">=</span> <span class="n">dataset</span><span class="o">.</span><span class="n">withColumn</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">getOutputCol</span><span class="p">(),</span> <span class="n">transformUDF</span><span class="p">(</span><span class="n">dataset</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">getInputCol</span><span class="p">()])</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">transformedDataset</span></div>
<span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">_PredictorParams</span><span class="p">(</span><span class="n">HasLabelCol</span><span class="p">,</span> <span class="n">HasFeaturesCol</span><span class="p">,</span> <span class="n">HasPredictionCol</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Params for :py:class:`Predictor` and :py:class:`PredictorModel`.</span>
<span class="sd"> .. versionadded:: 3.0.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">pass</span>
<div class="viewcode-block" id="Predictor"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.Predictor.html#pyspark.ml.Predictor">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">Predictor</span><span class="p">(</span><span class="n">Estimator</span><span class="p">[</span><span class="n">M</span><span class="p">],</span> <span class="n">_PredictorParams</span><span class="p">,</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Estimator for prediction tasks (regression and classification).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="Predictor.setLabelCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.Predictor.html#pyspark.ml.Predictor.setLabelCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;3.0.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">setLabelCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">P</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Sets the value of :py:attr:`labelCol`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">labelCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div>
<div class="viewcode-block" id="Predictor.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.Predictor.html#pyspark.ml.Predictor.setFeaturesCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;3.0.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">P</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Sets the value of :py:attr:`featuresCol`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div>
<div class="viewcode-block" id="Predictor.setPredictionCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.Predictor.html#pyspark.ml.Predictor.setPredictionCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;3.0.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">setPredictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">P</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Sets the value of :py:attr:`predictionCol`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">predictionCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="PredictionModel"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.PredictionModel.html#pyspark.ml.PredictionModel">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">PredictionModel</span><span class="p">(</span><span class="n">Model</span><span class="p">,</span> <span class="n">_PredictorParams</span><span class="p">,</span> <span class="n">Generic</span><span class="p">[</span><span class="n">T</span><span class="p">],</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Model for prediction tasks (regression and classification).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="PredictionModel.setFeaturesCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.PredictionModel.html#pyspark.ml.PredictionModel.setFeaturesCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;3.0.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">setFeaturesCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">P</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Sets the value of :py:attr:`featuresCol`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">featuresCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div>
<div class="viewcode-block" id="PredictionModel.setPredictionCol"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.PredictionModel.html#pyspark.ml.PredictionModel.setPredictionCol">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;3.0.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">setPredictionCol</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">P</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">P</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Sets the value of :py:attr:`predictionCol`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_set</span><span class="p">(</span><span class="n">predictionCol</span><span class="o">=</span><span class="n">value</span><span class="p">)</span></div>
<span class="nd">@property</span>
<span class="nd">@abstractmethod</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;2.1.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">numFeatures</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the number of features the model was trained on. If unknown, returns -1</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span>
<div class="viewcode-block" id="PredictionModel.predict"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.PredictionModel.html#pyspark.ml.PredictionModel.predict">[docs]</a> <span class="nd">@abstractmethod</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;3.0.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">predict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">T</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Predict label for the given features.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">()</span></div></div>
</pre></div>
</div>
<!-- Previous / next buttons -->
<div class='prev-next-area'>
</div>
</main>
</div>
</div>
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<div class="footer-item">
<p class="copyright">
&copy; Copyright .<br>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br>
</p>
</div>
</div>
</footer>
</body>
</html>