blob: 04220ee01edec33733fbbb6f27de8483377145f5 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>pyspark.ml.util &#8212; PySpark 3.5.2 documentation</title>
<link href="../../../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link rel="stylesheet"
href="../../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet" href="../../../_static/styles/pydata-sphinx-theme.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf">
<script id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/language_data.js"></script>
<script src="../../../_static/clipboard.min.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/ml/util.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="None">
<!-- Google Analytics -->
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<div class="container-fluid" id="banner"></div>
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main"><div class="container-xl">
<div id="navbar-start">
<a class="navbar-brand" href="../../../index.html">
<img src="../../../_static/spark-logo-reverse.png" class="logo" alt="logo">
</a>
</div>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-collapsible" aria-controls="navbar-collapsible" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-collapsible" class="col-lg-9 collapse navbar-collapse">
<div id="navbar-center" class="mr-auto">
<div class="navbar-center-item">
<ul id="navbar-main-elements" class="navbar-nav">
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../index.html">
Overview
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../getting_started/index.html">
Getting Started
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../user_guide/index.html">
User Guides
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../reference/index.html">
API Reference
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../development/index.html">
Development
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</div>
</div>
<div id="navbar-end">
<div class="navbar-end-item">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
3.5.2
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "_modules/pyspark/ml/util.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script>
</div>
</div>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<!-- Only show if we have sidebars configured, else just a small margin -->
<div class="col-12 col-md-3 bd-sidebar">
<div class="sidebar-start-items"><form class="bd-search d-flex align-items-center" action="../../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form><nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
</div>
</nav>
</div>
<div class="sidebar-end-items">
</div>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<h1>Source code for pyspark.ml.util</h1><div class="highlight"><pre>
<span></span><span class="c1">#</span>
<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
<span class="c1"># this work for additional information regarding copyright ownership.</span>
<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
<span class="c1"># (the &quot;License&quot;); you may not use this file except in compliance with</span>
<span class="c1"># the License. You may obtain a copy of the License at</span>
<span class="c1">#</span>
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="c1">#</span>
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
<span class="c1"># See the License for the specific language governing permissions and</span>
<span class="c1"># limitations under the License.</span>
<span class="c1">#</span>
<span class="kn">import</span> <span class="nn">json</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">time</span>
<span class="kn">import</span> <span class="nn">uuid</span>
<span class="kn">import</span> <span class="nn">functools</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">Any</span><span class="p">,</span>
<span class="n">Callable</span><span class="p">,</span>
<span class="n">Dict</span><span class="p">,</span>
<span class="n">Generic</span><span class="p">,</span>
<span class="n">List</span><span class="p">,</span>
<span class="n">Optional</span><span class="p">,</span>
<span class="n">Sequence</span><span class="p">,</span>
<span class="n">Type</span><span class="p">,</span>
<span class="n">TypeVar</span><span class="p">,</span>
<span class="n">cast</span><span class="p">,</span>
<span class="n">TYPE_CHECKING</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">since</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.common</span> <span class="kn">import</span> <span class="n">inherit_doc</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.utils</span> <span class="kn">import</span> <span class="n">is_remote</span>
<span class="kn">from</span> <span class="nn">pyspark.util</span> <span class="kn">import</span> <span class="n">VersionUtils</span>
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">py4j.java_gateway</span> <span class="kn">import</span> <span class="n">JavaGateway</span><span class="p">,</span> <span class="n">JavaObject</span>
<span class="kn">from</span> <span class="nn">pyspark.ml._typing</span> <span class="kn">import</span> <span class="n">PipelineStage</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.base</span> <span class="kn">import</span> <span class="n">Params</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.wrapper</span> <span class="kn">import</span> <span class="n">JavaWrapper</span>
<span class="n">T</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;T&quot;</span><span class="p">)</span>
<span class="n">RW</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;RW&quot;</span><span class="p">,</span> <span class="n">bound</span><span class="o">=</span><span class="s2">&quot;BaseReadWrite&quot;</span><span class="p">)</span>
<span class="n">W</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;W&quot;</span><span class="p">,</span> <span class="n">bound</span><span class="o">=</span><span class="s2">&quot;MLWriter&quot;</span><span class="p">)</span>
<span class="n">JW</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;JW&quot;</span><span class="p">,</span> <span class="n">bound</span><span class="o">=</span><span class="s2">&quot;JavaMLWriter&quot;</span><span class="p">)</span>
<span class="n">RL</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;RL&quot;</span><span class="p">,</span> <span class="n">bound</span><span class="o">=</span><span class="s2">&quot;MLReadable&quot;</span><span class="p">)</span>
<span class="n">JR</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;JR&quot;</span><span class="p">,</span> <span class="n">bound</span><span class="o">=</span><span class="s2">&quot;JavaMLReader&quot;</span><span class="p">)</span>
<span class="n">FuncT</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;FuncT&quot;</span><span class="p">,</span> <span class="n">bound</span><span class="o">=</span><span class="n">Callable</span><span class="p">[</span><span class="o">...</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span>
<span class="k">def</span> <span class="nf">_jvm</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="s2">&quot;JavaGateway&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the JVM view associated with SparkContext. Must be called</span>
<span class="sd"> after SparkContext is initialized.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">jvm</span> <span class="o">=</span> <span class="n">SparkContext</span><span class="o">.</span><span class="n">_jvm</span>
<span class="k">if</span> <span class="n">jvm</span><span class="p">:</span>
<span class="k">return</span> <span class="n">jvm</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="s2">&quot;Cannot load _jvm from SparkContext. Is SparkContext initialized?&quot;</span><span class="p">)</span>
<div class="viewcode-block" id="Identifiable"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.Identifiable.html#pyspark.ml.Identifiable">[docs]</a><span class="k">class</span> <span class="nc">Identifiable</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Object with a unique ID.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1">#: A unique id for the object.</span>
<span class="bp">self</span><span class="o">.</span><span class="n">uid</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_randomUID</span><span class="p">()</span>
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">_randomUID</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generate a unique string id for the object. The default implementation</span>
<span class="sd"> concatenates the class name, &quot;_&quot;, and 12 random hex chars.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="bp">cls</span><span class="o">.</span><span class="vm">__name__</span> <span class="o">+</span> <span class="s2">&quot;_&quot;</span> <span class="o">+</span> <span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">()</span><span class="o">.</span><span class="n">hex</span><span class="p">[</span><span class="o">-</span><span class="mi">12</span><span class="p">:])</span></div>
<div class="viewcode-block" id="BaseReadWrite"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.BaseReadWrite.html#pyspark.ml.BaseReadWrite">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">BaseReadWrite</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Base class for MLWriter and MLReader. Stores information about the SparkContext</span>
<span class="sd"> and SparkSession.</span>
<span class="sd"> .. versionadded:: 2.3.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_sparkSession</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">SparkSession</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<div class="viewcode-block" id="BaseReadWrite.session"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.BaseReadWrite.html#pyspark.ml.BaseReadWrite.session">[docs]</a> <span class="k">def</span> <span class="nf">session</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">RW</span><span class="p">,</span> <span class="n">sparkSession</span><span class="p">:</span> <span class="n">SparkSession</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">RW</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Sets the Spark Session to use for saving/loading.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_sparkSession</span> <span class="o">=</span> <span class="n">sparkSession</span>
<span class="k">return</span> <span class="bp">self</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">sparkSession</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkSession</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the user-specified Spark Session or the default.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sparkSession</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_sparkSession</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">_getActiveSessionOrCreate</span><span class="p">()</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sparkSession</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sparkSession</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">sc</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparkContext</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the underlying `SparkContext`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="bp">self</span><span class="o">.</span><span class="n">sparkSession</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">sparkSession</span><span class="o">.</span><span class="n">sparkContext</span></div>
<div class="viewcode-block" id="MLWriter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWriter.html#pyspark.ml.MLWriter">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">MLWriter</span><span class="p">(</span><span class="n">BaseReadWrite</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Utility class that can save ML instances.</span>
<span class="sd"> .. versionadded:: 2.0.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="nb">super</span><span class="p">(</span><span class="n">MLWriter</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">shouldOverwrite</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="bp">self</span><span class="o">.</span><span class="n">optionMap</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">def</span> <span class="nf">_handleOverwrite</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.wrapper</span> <span class="kn">import</span> <span class="n">JavaWrapper</span>
<span class="n">_java_obj</span> <span class="o">=</span> <span class="n">JavaWrapper</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">&quot;org.apache.spark.ml.util.FileSystemOverwrite&quot;</span><span class="p">)</span>
<span class="n">wrapper</span> <span class="o">=</span> <span class="n">JavaWrapper</span><span class="p">(</span><span class="n">_java_obj</span><span class="p">)</span>
<span class="n">wrapper</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">&quot;handleOverwrite&quot;</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">sparkSession</span><span class="o">.</span><span class="n">_jsparkSession</span><span class="p">)</span>
<div class="viewcode-block" id="MLWriter.save"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWriter.html#pyspark.ml.MLWriter.save">[docs]</a> <span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Save the ML instance to the input path.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">shouldOverwrite</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_handleOverwrite</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">saveImpl</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="MLWriter.saveImpl"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWriter.html#pyspark.ml.MLWriter.saveImpl">[docs]</a> <span class="k">def</span> <span class="nf">saveImpl</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> save() handles overwriting and then calls this method. Subclasses should override this</span>
<span class="sd"> method to implement the actual saving of the instance.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;MLWriter is not yet implemented for type: </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div>
<div class="viewcode-block" id="MLWriter.overwrite"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWriter.html#pyspark.ml.MLWriter.overwrite">[docs]</a> <span class="k">def</span> <span class="nf">overwrite</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;MLWriter&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Overwrites if the output path already exists.&quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">shouldOverwrite</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="MLWriter.option"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWriter.html#pyspark.ml.MLWriter.option">[docs]</a> <span class="k">def</span> <span class="nf">option</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;MLWriter&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Adds an option to the underlying MLWriter. See the documentation for the specific model&#39;s</span>
<span class="sd"> writer for possible options. The option name (key) is case-insensitive.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">optionMap</span><span class="p">[</span><span class="n">key</span><span class="o">.</span><span class="n">lower</span><span class="p">()]</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div></div>
<div class="viewcode-block" id="GeneralMLWriter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.GeneralMLWriter.html#pyspark.ml.GeneralMLWriter">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">GeneralMLWriter</span><span class="p">(</span><span class="n">MLWriter</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Utility class that can save ML instances in different formats.</span>
<span class="sd"> .. versionadded:: 2.4.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="GeneralMLWriter.format"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.GeneralMLWriter.html#pyspark.ml.GeneralMLWriter.format">[docs]</a> <span class="k">def</span> <span class="nf">format</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;GeneralMLWriter&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Specifies the format of ML export (&quot;pmml&quot;, &quot;internal&quot;, or the fully qualified class</span>
<span class="sd"> name for export).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">source</span> <span class="o">=</span> <span class="n">source</span>
<span class="k">return</span> <span class="bp">self</span></div></div>
<span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">JavaMLWriter</span><span class="p">(</span><span class="n">MLWriter</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> (Private) Specialization of :py:class:`MLWriter` for :py:class:`JavaParams` types</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">_jwrite</span><span class="p">:</span> <span class="s2">&quot;JavaObject&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instance</span><span class="p">:</span> <span class="s2">&quot;JavaMLWritable&quot;</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">JavaMLWriter</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="n">_java_obj</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="n">_to_java</span><span class="p">()</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span> <span class="o">=</span> <span class="n">_java_obj</span><span class="o">.</span><span class="n">write</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Save the ML instance to the input path.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;path should be a string, got type </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">path</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">overwrite</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;JavaMLWriter&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Overwrites if the output path already exists.&quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">overwrite</span><span class="p">()</span>
<span class="k">return</span> <span class="bp">self</span>
<span class="k">def</span> <span class="nf">option</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;JavaMLWriter&quot;</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">option</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span>
<span class="k">def</span> <span class="nf">session</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sparkSession</span><span class="p">:</span> <span class="n">SparkSession</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;JavaMLWriter&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Sets the Spark Session to use for saving.&quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">session</span><span class="p">(</span><span class="n">sparkSession</span><span class="o">.</span><span class="n">_jsparkSession</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span>
<span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">GeneralJavaMLWriter</span><span class="p">(</span><span class="n">JavaMLWriter</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> (Private) Specialization of :py:class:`GeneralMLWriter` for :py:class:`JavaParams` types</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instance</span><span class="p">:</span> <span class="s2">&quot;JavaMLWritable&quot;</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">GeneralJavaMLWriter</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">instance</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">format</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;GeneralJavaMLWriter&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Specifies the format of ML export (&quot;pmml&quot;, &quot;internal&quot;, or the fully qualified class</span>
<span class="sd"> name for export).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">source</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span>
<div class="viewcode-block" id="MLWritable"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWritable.html#pyspark.ml.MLWritable">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">MLWritable</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Mixin for ML instances that provide :py:class:`MLWriter`.</span>
<span class="sd"> .. versionadded:: 2.0.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="MLWritable.write"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWritable.html#pyspark.ml.MLWritable.write">[docs]</a> <span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">MLWriter</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Returns an MLWriter instance for this ML instance.&quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;MLWritable is not yet implemented for type: </span><span class="si">%r</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div>
<div class="viewcode-block" id="MLWritable.save"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWritable.html#pyspark.ml.MLWritable.save">[docs]</a> <span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Save this ML instance to the given path, a shortcut of &#39;write().save(path)&#39;.&quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">write</span><span class="p">()</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div></div>
<span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">JavaMLWritable</span><span class="p">(</span><span class="n">MLWritable</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> (Private) Mixin for ML instances that provide :py:class:`JavaMLWriter`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">JavaMLWriter</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Returns an MLWriter instance for this ML instance.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">JavaMLWriter</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">GeneralJavaMLWritable</span><span class="p">(</span><span class="n">JavaMLWritable</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> (Private) Mixin for ML instances that provide :py:class:`GeneralJavaMLWriter`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">GeneralJavaMLWriter</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Returns an GeneralMLWriter instance for this ML instance.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">GeneralJavaMLWriter</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<div class="viewcode-block" id="MLReader"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLReader.html#pyspark.ml.MLReader">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">MLReader</span><span class="p">(</span><span class="n">BaseReadWrite</span><span class="p">,</span> <span class="n">Generic</span><span class="p">[</span><span class="n">RL</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Utility class that can load ML instances.</span>
<span class="sd"> .. versionadded:: 2.0.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="nb">super</span><span class="p">(</span><span class="n">MLReader</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<div class="viewcode-block" id="MLReader.load"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLReader.html#pyspark.ml.MLReader.load">[docs]</a> <span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">RL</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Load the ML instance from the input path.&quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;MLReader is not yet implemented for type: </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div></div>
<span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">JavaMLReader</span><span class="p">(</span><span class="n">MLReader</span><span class="p">[</span><span class="n">RL</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> (Private) Specialization of :py:class:`MLReader` for :py:class:`JavaParams` types</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">clazz</span><span class="p">:</span> <span class="n">Type</span><span class="p">[</span><span class="s2">&quot;JavaMLReadable[RL]&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="nb">super</span><span class="p">(</span><span class="n">JavaMLReader</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_clazz</span> <span class="o">=</span> <span class="n">clazz</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jread</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_load_java_obj</span><span class="p">(</span><span class="n">clazz</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">RL</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Load the ML instance from the input path.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;path should be a string, got type </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">path</span><span class="p">))</span>
<span class="n">java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jread</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_clazz</span><span class="p">,</span> <span class="s2">&quot;_from_java&quot;</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span>
<span class="s2">&quot;This Java ML type cannot be loaded into Python currently: </span><span class="si">%r</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">_clazz</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_clazz</span><span class="o">.</span><span class="n">_from_java</span><span class="p">(</span><span class="n">java_obj</span><span class="p">)</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="k">def</span> <span class="nf">session</span><span class="p">(</span><span class="bp">self</span><span class="p">:</span> <span class="n">JR</span><span class="p">,</span> <span class="n">sparkSession</span><span class="p">:</span> <span class="n">SparkSession</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">JR</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Sets the Spark Session to use for loading.&quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jread</span><span class="o">.</span><span class="n">session</span><span class="p">(</span><span class="n">sparkSession</span><span class="o">.</span><span class="n">_jsparkSession</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">_java_loader_class</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">clazz</span><span class="p">:</span> <span class="n">Type</span><span class="p">[</span><span class="s2">&quot;JavaMLReadable[RL]&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the full class name of the Java ML instance. The default</span>
<span class="sd"> implementation replaces &quot;pyspark&quot; by &quot;org.apache.spark&quot; in</span>
<span class="sd"> the Python full class name.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">java_package</span> <span class="o">=</span> <span class="n">clazz</span><span class="o">.</span><span class="vm">__module__</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;pyspark&quot;</span><span class="p">,</span> <span class="s2">&quot;org.apache.spark&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">clazz</span><span class="o">.</span><span class="vm">__name__</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">&quot;Pipeline&quot;</span><span class="p">,</span> <span class="s2">&quot;PipelineModel&quot;</span><span class="p">):</span>
<span class="c1"># Remove the last package name &quot;pipeline&quot; for Pipeline and PipelineModel.</span>
<span class="n">java_package</span> <span class="o">=</span> <span class="s2">&quot;.&quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">java_package</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot;.&quot;</span><span class="p">)[</span><span class="mi">0</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
<span class="k">return</span> <span class="n">java_package</span> <span class="o">+</span> <span class="s2">&quot;.&quot;</span> <span class="o">+</span> <span class="n">clazz</span><span class="o">.</span><span class="vm">__name__</span>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">_load_java_obj</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">clazz</span><span class="p">:</span> <span class="n">Type</span><span class="p">[</span><span class="s2">&quot;JavaMLReadable[RL]&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="s2">&quot;JavaObject&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Load the peer Java object of the ML instance.&quot;&quot;&quot;</span>
<span class="n">java_class</span> <span class="o">=</span> <span class="bp">cls</span><span class="o">.</span><span class="n">_java_loader_class</span><span class="p">(</span><span class="n">clazz</span><span class="p">)</span>
<span class="n">java_obj</span> <span class="o">=</span> <span class="n">_jvm</span><span class="p">()</span>
<span class="k">for</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">java_class</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot;.&quot;</span><span class="p">):</span>
<span class="n">java_obj</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">java_obj</span><span class="p">,</span> <span class="n">name</span><span class="p">)</span>
<span class="k">return</span> <span class="n">java_obj</span>
<div class="viewcode-block" id="MLReadable"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLReadable.html#pyspark.ml.MLReadable">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">MLReadable</span><span class="p">(</span><span class="n">Generic</span><span class="p">[</span><span class="n">RL</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Mixin for instances that provide :py:class:`MLReader`.</span>
<span class="sd"> .. versionadded:: 2.0.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="MLReadable.read"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLReadable.html#pyspark.ml.MLReadable.read">[docs]</a> <span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">MLReader</span><span class="p">[</span><span class="n">RL</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Returns an MLReader instance for this class.&quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;MLReadable.read() not implemented for type: </span><span class="si">%r</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="bp">cls</span><span class="p">)</span></div>
<div class="viewcode-block" id="MLReadable.load"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLReadable.html#pyspark.ml.MLReadable.load">[docs]</a> <span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">RL</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Reads an ML instance from the input path, a shortcut of `read().load(path)`.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">cls</span><span class="o">.</span><span class="n">read</span><span class="p">()</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div></div>
<span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">JavaMLReadable</span><span class="p">(</span><span class="n">MLReadable</span><span class="p">[</span><span class="n">RL</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> (Private) Mixin for instances that provide JavaMLReader.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">JavaMLReader</span><span class="p">[</span><span class="n">RL</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Returns an MLReader instance for this class.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">JavaMLReader</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span>
<div class="viewcode-block" id="DefaultParamsWritable"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsWritable.html#pyspark.ml.DefaultParamsWritable">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">DefaultParamsWritable</span><span class="p">(</span><span class="n">MLWritable</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Helper trait for making simple :py:class:`Params` types writable. If a :py:class:`Params`</span>
<span class="sd"> class stores all data as :py:class:`Param` values, then extending this trait will provide</span>
<span class="sd"> a default implementation of writing saved instances of the class.</span>
<span class="sd"> This only handles simple :py:class:`Param` types; e.g., it will not handle</span>
<span class="sd"> :py:class:`pyspark.sql.DataFrame`. See :py:class:`DefaultParamsReadable`, the counterpart</span>
<span class="sd"> to this class.</span>
<span class="sd"> .. versionadded:: 2.3.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="DefaultParamsWritable.write"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsWritable.html#pyspark.ml.DefaultParamsWritable.write">[docs]</a> <span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">MLWriter</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Returns a DefaultParamsWriter instance for this class.&quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.param</span> <span class="kn">import</span> <span class="n">Params</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">Params</span><span class="p">):</span>
<span class="k">return</span> <span class="n">DefaultParamsWriter</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;Cannot use DefaultParamsWritable with type </span><span class="si">%s</span><span class="s2"> because it does not &quot;</span>
<span class="o">+</span> <span class="s2">&quot; extend Params.&quot;</span><span class="p">,</span>
<span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">),</span>
<span class="p">)</span></div></div>
<div class="viewcode-block" id="DefaultParamsWriter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsWriter.html#pyspark.ml.DefaultParamsWriter">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">DefaultParamsWriter</span><span class="p">(</span><span class="n">MLWriter</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Specialization of :py:class:`MLWriter` for :py:class:`Params` types</span>
<span class="sd"> Class for writing Estimators and Transformers whose parameters are JSON-serializable.</span>
<span class="sd"> .. versionadded:: 2.3.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instance</span><span class="p">:</span> <span class="s2">&quot;Params&quot;</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">DefaultParamsWriter</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">instance</span> <span class="o">=</span> <span class="n">instance</span>
<div class="viewcode-block" id="DefaultParamsWriter.saveImpl"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsWriter.html#pyspark.ml.DefaultParamsWriter.saveImpl">[docs]</a> <span class="k">def</span> <span class="nf">saveImpl</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">DefaultParamsWriter</span><span class="o">.</span><span class="n">saveMetadata</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instance</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">sc</span><span class="p">)</span></div>
<div class="viewcode-block" id="DefaultParamsWriter.extractJsonParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsWriter.html#pyspark.ml.DefaultParamsWriter.extractJsonParams">[docs]</a> <span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">extractJsonParams</span><span class="p">(</span><span class="n">instance</span><span class="p">:</span> <span class="s2">&quot;Params&quot;</span><span class="p">,</span> <span class="n">skipParams</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<span class="n">paramMap</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="n">extractParamMap</span><span class="p">()</span>
<span class="n">jsonParams</span> <span class="o">=</span> <span class="p">{</span>
<span class="n">param</span><span class="o">.</span><span class="n">name</span><span class="p">:</span> <span class="n">value</span> <span class="k">for</span> <span class="n">param</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">paramMap</span><span class="o">.</span><span class="n">items</span><span class="p">()</span> <span class="k">if</span> <span class="n">param</span><span class="o">.</span><span class="n">name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">skipParams</span>
<span class="p">}</span>
<span class="k">return</span> <span class="n">jsonParams</span></div>
<div class="viewcode-block" id="DefaultParamsWriter.saveMetadata"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsWriter.html#pyspark.ml.DefaultParamsWriter.saveMetadata">[docs]</a> <span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">saveMetadata</span><span class="p">(</span>
<span class="n">instance</span><span class="p">:</span> <span class="s2">&quot;Params&quot;</span><span class="p">,</span>
<span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span>
<span class="n">extraMetadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">paramMap</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Saves metadata + Params to: path + &quot;/metadata&quot;</span>
<span class="sd"> - class</span>
<span class="sd"> - timestamp</span>
<span class="sd"> - sparkVersion</span>
<span class="sd"> - uid</span>
<span class="sd"> - paramMap</span>
<span class="sd"> - defaultParamMap (since 2.4.0)</span>
<span class="sd"> - (optionally, extra metadata)</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> extraMetadata : dict, optional</span>
<span class="sd"> Extra metadata to be saved at same level as uid, paramMap, etc.</span>
<span class="sd"> paramMap : dict, optional</span>
<span class="sd"> If given, this is saved in the &quot;paramMap&quot; field.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">metadataPath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s2">&quot;metadata&quot;</span><span class="p">)</span>
<span class="n">metadataJson</span> <span class="o">=</span> <span class="n">DefaultParamsWriter</span><span class="o">.</span><span class="n">_get_metadata_to_save</span><span class="p">(</span>
<span class="n">instance</span><span class="p">,</span> <span class="n">sc</span><span class="p">,</span> <span class="n">extraMetadata</span><span class="p">,</span> <span class="n">paramMap</span>
<span class="p">)</span>
<span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">([</span><span class="n">metadataJson</span><span class="p">],</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">saveAsTextFile</span><span class="p">(</span><span class="n">metadataPath</span><span class="p">)</span></div>
<span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">_get_metadata_to_save</span><span class="p">(</span>
<span class="n">instance</span><span class="p">:</span> <span class="s2">&quot;Params&quot;</span><span class="p">,</span>
<span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span>
<span class="n">extraMetadata</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">paramMap</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Helper for :py:meth:`DefaultParamsWriter.saveMetadata` which extracts the JSON to save.</span>
<span class="sd"> This is useful for ensemble models which need to save metadata for many sub-models.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> See :py:meth:`DefaultParamsWriter.saveMetadata` for details on what this includes.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">uid</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="n">uid</span>
<span class="bp">cls</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="vm">__module__</span> <span class="o">+</span> <span class="s2">&quot;.&quot;</span> <span class="o">+</span> <span class="n">instance</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span>
<span class="c1"># User-supplied param values</span>
<span class="n">params</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="n">_paramMap</span>
<span class="n">jsonParams</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">if</span> <span class="n">paramMap</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">jsonParams</span> <span class="o">=</span> <span class="n">paramMap</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">params</span><span class="p">:</span>
<span class="n">jsonParams</span><span class="p">[</span><span class="n">p</span><span class="o">.</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="n">p</span><span class="p">]</span>
<span class="c1"># Default param values</span>
<span class="n">jsonDefaultParams</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">instance</span><span class="o">.</span><span class="n">_defaultParamMap</span><span class="p">:</span>
<span class="n">jsonDefaultParams</span><span class="p">[</span><span class="n">p</span><span class="o">.</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="n">_defaultParamMap</span><span class="p">[</span><span class="n">p</span><span class="p">]</span>
<span class="n">basicMetadata</span> <span class="o">=</span> <span class="p">{</span>
<span class="s2">&quot;class&quot;</span><span class="p">:</span> <span class="bp">cls</span><span class="p">,</span>
<span class="s2">&quot;timestamp&quot;</span><span class="p">:</span> <span class="nb">int</span><span class="p">(</span><span class="nb">round</span><span class="p">(</span><span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span> <span class="o">*</span> <span class="mi">1000</span><span class="p">)),</span>
<span class="s2">&quot;sparkVersion&quot;</span><span class="p">:</span> <span class="n">sc</span><span class="o">.</span><span class="n">version</span><span class="p">,</span>
<span class="s2">&quot;uid&quot;</span><span class="p">:</span> <span class="n">uid</span><span class="p">,</span>
<span class="s2">&quot;paramMap&quot;</span><span class="p">:</span> <span class="n">jsonParams</span><span class="p">,</span>
<span class="s2">&quot;defaultParamMap&quot;</span><span class="p">:</span> <span class="n">jsonDefaultParams</span><span class="p">,</span>
<span class="p">}</span>
<span class="k">if</span> <span class="n">extraMetadata</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">basicMetadata</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">extraMetadata</span><span class="p">)</span>
<span class="k">return</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">basicMetadata</span><span class="p">,</span> <span class="n">separators</span><span class="o">=</span><span class="p">(</span><span class="s2">&quot;,&quot;</span><span class="p">,</span> <span class="s2">&quot;:&quot;</span><span class="p">))</span></div>
<div class="viewcode-block" id="DefaultParamsReadable"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReadable.html#pyspark.ml.DefaultParamsReadable">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">DefaultParamsReadable</span><span class="p">(</span><span class="n">MLReadable</span><span class="p">[</span><span class="n">RL</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Helper trait for making simple :py:class:`Params` types readable.</span>
<span class="sd"> If a :py:class:`Params` class stores all data as :py:class:`Param` values,</span>
<span class="sd"> then extending this trait will provide a default implementation of reading saved</span>
<span class="sd"> instances of the class. This only handles simple :py:class:`Param` types;</span>
<span class="sd"> e.g., it will not handle :py:class:`pyspark.sql.DataFrame`. See</span>
<span class="sd"> :py:class:`DefaultParamsWritable`, the counterpart to this class.</span>
<span class="sd"> .. versionadded:: 2.3.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="DefaultParamsReadable.read"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReadable.html#pyspark.ml.DefaultParamsReadable.read">[docs]</a> <span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;DefaultParamsReader[RL]&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Returns a DefaultParamsReader instance for this class.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">DefaultParamsReader</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="DefaultParamsReader"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReader.html#pyspark.ml.DefaultParamsReader">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">DefaultParamsReader</span><span class="p">(</span><span class="n">MLReader</span><span class="p">[</span><span class="n">RL</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Specialization of :py:class:`MLReader` for :py:class:`Params` types</span>
<span class="sd"> Default :py:class:`MLReader` implementation for transformers and estimators that</span>
<span class="sd"> contain basic (json-serializable) params and no data. This will not handle</span>
<span class="sd"> more complex params or types with data (e.g., models with coefficients).</span>
<span class="sd"> .. versionadded:: 2.3.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="bp">cls</span><span class="p">:</span> <span class="n">Type</span><span class="p">[</span><span class="n">DefaultParamsReadable</span><span class="p">[</span><span class="n">RL</span><span class="p">]]):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">DefaultParamsReader</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">cls</span> <span class="o">=</span> <span class="bp">cls</span>
<span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">__get_class</span><span class="p">(</span><span class="n">clazz</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Type</span><span class="p">[</span><span class="n">RL</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads Python class from its name.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">parts</span> <span class="o">=</span> <span class="n">clazz</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot;.&quot;</span><span class="p">)</span>
<span class="n">module</span> <span class="o">=</span> <span class="s2">&quot;.&quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">parts</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
<span class="n">m</span> <span class="o">=</span> <span class="nb">__import__</span><span class="p">(</span><span class="n">module</span><span class="p">,</span> <span class="n">fromlist</span><span class="o">=</span><span class="p">[</span><span class="n">parts</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]])</span>
<span class="k">return</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">parts</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
<div class="viewcode-block" id="DefaultParamsReader.load"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReader.html#pyspark.ml.DefaultParamsReader.load">[docs]</a> <span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">RL</span><span class="p">:</span>
<span class="n">metadata</span> <span class="o">=</span> <span class="n">DefaultParamsReader</span><span class="o">.</span><span class="n">loadMetadata</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">sc</span><span class="p">)</span>
<span class="n">py_type</span><span class="p">:</span> <span class="n">Type</span><span class="p">[</span><span class="n">RL</span><span class="p">]</span> <span class="o">=</span> <span class="n">DefaultParamsReader</span><span class="o">.</span><span class="n">__get_class</span><span class="p">(</span><span class="n">metadata</span><span class="p">[</span><span class="s2">&quot;class&quot;</span><span class="p">])</span>
<span class="n">instance</span> <span class="o">=</span> <span class="n">py_type</span><span class="p">()</span>
<span class="n">cast</span><span class="p">(</span><span class="s2">&quot;Params&quot;</span><span class="p">,</span> <span class="n">instance</span><span class="p">)</span><span class="o">.</span><span class="n">_resetUid</span><span class="p">(</span><span class="n">metadata</span><span class="p">[</span><span class="s2">&quot;uid&quot;</span><span class="p">])</span>
<span class="n">DefaultParamsReader</span><span class="o">.</span><span class="n">getAndSetParams</span><span class="p">(</span><span class="n">instance</span><span class="p">,</span> <span class="n">metadata</span><span class="p">)</span>
<span class="k">return</span> <span class="n">instance</span></div>
<div class="viewcode-block" id="DefaultParamsReader.loadMetadata"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReader.html#pyspark.ml.DefaultParamsReader.loadMetadata">[docs]</a> <span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">loadMetadata</span><span class="p">(</span><span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">expectedClassName</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Load metadata saved using :py:meth:`DefaultParamsWriter.saveMetadata`</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> path : str</span>
<span class="sd"> sc : :py:class:`pyspark.SparkContext`</span>
<span class="sd"> expectedClassName : str, optional</span>
<span class="sd"> If non empty, this is checked against the loaded metadata.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">metadataPath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s2">&quot;metadata&quot;</span><span class="p">)</span>
<span class="n">metadataStr</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">textFile</span><span class="p">(</span><span class="n">metadataPath</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
<span class="n">loadedVals</span> <span class="o">=</span> <span class="n">DefaultParamsReader</span><span class="o">.</span><span class="n">_parseMetaData</span><span class="p">(</span><span class="n">metadataStr</span><span class="p">,</span> <span class="n">expectedClassName</span><span class="p">)</span>
<span class="k">return</span> <span class="n">loadedVals</span></div>
<span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">_parseMetaData</span><span class="p">(</span><span class="n">metadataStr</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">expectedClassName</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Parse metadata JSON string produced by :py:meth`DefaultParamsWriter._get_metadata_to_save`.</span>
<span class="sd"> This is a helper function for :py:meth:`DefaultParamsReader.loadMetadata`.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> metadataStr : str</span>
<span class="sd"> JSON string of metadata</span>
<span class="sd"> expectedClassName : str, optional</span>
<span class="sd"> If non empty, this is checked against the loaded metadata.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">metadata</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">metadataStr</span><span class="p">)</span>
<span class="n">className</span> <span class="o">=</span> <span class="n">metadata</span><span class="p">[</span><span class="s2">&quot;class&quot;</span><span class="p">]</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">expectedClassName</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">assert</span> <span class="n">className</span> <span class="o">==</span> <span class="n">expectedClassName</span><span class="p">,</span> <span class="p">(</span>
<span class="s2">&quot;Error loading metadata: Expected &quot;</span>
<span class="o">+</span> <span class="s2">&quot;class name </span><span class="si">{}</span><span class="s2"> but found class name </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">expectedClassName</span><span class="p">,</span> <span class="n">className</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">metadata</span>
<div class="viewcode-block" id="DefaultParamsReader.getAndSetParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReader.html#pyspark.ml.DefaultParamsReader.getAndSetParams">[docs]</a> <span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">getAndSetParams</span><span class="p">(</span>
<span class="n">instance</span><span class="p">:</span> <span class="n">RL</span><span class="p">,</span> <span class="n">metadata</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">],</span> <span class="n">skipParams</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Extract Params from metadata, and set them in the instance.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Set user-supplied param values</span>
<span class="k">for</span> <span class="n">paramName</span> <span class="ow">in</span> <span class="n">metadata</span><span class="p">[</span><span class="s2">&quot;paramMap&quot;</span><span class="p">]:</span>
<span class="n">param</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="s2">&quot;Params&quot;</span><span class="p">,</span> <span class="n">instance</span><span class="p">)</span><span class="o">.</span><span class="n">getParam</span><span class="p">(</span><span class="n">paramName</span><span class="p">)</span>
<span class="k">if</span> <span class="n">skipParams</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">paramName</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">skipParams</span><span class="p">:</span>
<span class="n">paramValue</span> <span class="o">=</span> <span class="n">metadata</span><span class="p">[</span><span class="s2">&quot;paramMap&quot;</span><span class="p">][</span><span class="n">paramName</span><span class="p">]</span>
<span class="n">cast</span><span class="p">(</span><span class="s2">&quot;Params&quot;</span><span class="p">,</span> <span class="n">instance</span><span class="p">)</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">paramValue</span><span class="p">)</span>
<span class="c1"># Set default param values</span>
<span class="n">majorAndMinorVersions</span> <span class="o">=</span> <span class="n">VersionUtils</span><span class="o">.</span><span class="n">majorMinorVersion</span><span class="p">(</span><span class="n">metadata</span><span class="p">[</span><span class="s2">&quot;sparkVersion&quot;</span><span class="p">])</span>
<span class="n">major</span> <span class="o">=</span> <span class="n">majorAndMinorVersions</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">minor</span> <span class="o">=</span> <span class="n">majorAndMinorVersions</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="c1"># For metadata file prior to Spark 2.4, there is no default section.</span>
<span class="k">if</span> <span class="n">major</span> <span class="o">&gt;</span> <span class="mi">2</span> <span class="ow">or</span> <span class="p">(</span><span class="n">major</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">and</span> <span class="n">minor</span> <span class="o">&gt;=</span> <span class="mi">4</span><span class="p">):</span>
<span class="k">assert</span> <span class="s2">&quot;defaultParamMap&quot;</span> <span class="ow">in</span> <span class="n">metadata</span><span class="p">,</span> <span class="p">(</span>
<span class="s2">&quot;Error loading metadata: Expected &quot;</span> <span class="o">+</span> <span class="s2">&quot;`defaultParamMap` section not found&quot;</span>
<span class="p">)</span>
<span class="k">for</span> <span class="n">paramName</span> <span class="ow">in</span> <span class="n">metadata</span><span class="p">[</span><span class="s2">&quot;defaultParamMap&quot;</span><span class="p">]:</span>
<span class="n">paramValue</span> <span class="o">=</span> <span class="n">metadata</span><span class="p">[</span><span class="s2">&quot;defaultParamMap&quot;</span><span class="p">][</span><span class="n">paramName</span><span class="p">]</span>
<span class="n">cast</span><span class="p">(</span><span class="s2">&quot;Params&quot;</span><span class="p">,</span> <span class="n">instance</span><span class="p">)</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="o">**</span><span class="p">{</span><span class="n">paramName</span><span class="p">:</span> <span class="n">paramValue</span><span class="p">})</span></div>
<div class="viewcode-block" id="DefaultParamsReader.isPythonParamsInstance"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReader.html#pyspark.ml.DefaultParamsReader.isPythonParamsInstance">[docs]</a> <span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">isPythonParamsInstance</span><span class="p">(</span><span class="n">metadata</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Any</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="k">return</span> <span class="n">metadata</span><span class="p">[</span><span class="s2">&quot;class&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">&quot;pyspark.ml.&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="DefaultParamsReader.loadParamsInstance"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReader.html#pyspark.ml.DefaultParamsReader.loadParamsInstance">[docs]</a> <span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">loadParamsInstance</span><span class="p">(</span><span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">sc</span><span class="p">:</span> <span class="n">SparkContext</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">RL</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Load a :py:class:`Params` instance from the given path, and return it.</span>
<span class="sd"> This assumes the instance inherits from :py:class:`MLReadable`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">metadata</span> <span class="o">=</span> <span class="n">DefaultParamsReader</span><span class="o">.</span><span class="n">loadMetadata</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">sc</span><span class="p">)</span>
<span class="k">if</span> <span class="n">DefaultParamsReader</span><span class="o">.</span><span class="n">isPythonParamsInstance</span><span class="p">(</span><span class="n">metadata</span><span class="p">):</span>
<span class="n">pythonClassName</span> <span class="o">=</span> <span class="n">metadata</span><span class="p">[</span><span class="s2">&quot;class&quot;</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">pythonClassName</span> <span class="o">=</span> <span class="n">metadata</span><span class="p">[</span><span class="s2">&quot;class&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;org.apache.spark&quot;</span><span class="p">,</span> <span class="s2">&quot;pyspark&quot;</span><span class="p">)</span>
<span class="n">py_type</span><span class="p">:</span> <span class="n">Type</span><span class="p">[</span><span class="n">RL</span><span class="p">]</span> <span class="o">=</span> <span class="n">DefaultParamsReader</span><span class="o">.</span><span class="n">__get_class</span><span class="p">(</span><span class="n">pythonClassName</span><span class="p">)</span>
<span class="n">instance</span> <span class="o">=</span> <span class="n">py_type</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="k">return</span> <span class="n">instance</span></div></div>
<div class="viewcode-block" id="HasTrainingSummary"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.HasTrainingSummary.html#pyspark.ml.HasTrainingSummary">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">HasTrainingSummary</span><span class="p">(</span><span class="n">Generic</span><span class="p">[</span><span class="n">T</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Base class for models that provides Training summary.</span>
<span class="sd"> .. versionadded:: 3.0.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;2.1.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">hasSummary</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Indicates whether a training summary exists for this model</span>
<span class="sd"> instance.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="s2">&quot;JavaWrapper&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">&quot;hasSummary&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;2.1.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">summary</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">T</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Gets summary of the model trained on the training set. An exception is thrown if</span>
<span class="sd"> no summary exists.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="s2">&quot;JavaWrapper&quot;</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">&quot;summary&quot;</span><span class="p">)</span></div>
<span class="k">class</span> <span class="nc">MetaAlgorithmReadWrite</span><span class="p">:</span>
<span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">isMetaEstimator</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">pyspark.ml</span> <span class="kn">import</span> <span class="n">Estimator</span><span class="p">,</span> <span class="n">Pipeline</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.tuning</span> <span class="kn">import</span> <span class="n">_ValidatorParams</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.classification</span> <span class="kn">import</span> <span class="n">OneVsRest</span>
<span class="k">return</span> <span class="p">(</span>
<span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">Pipeline</span><span class="p">)</span>
<span class="ow">or</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">OneVsRest</span><span class="p">)</span>
<span class="ow">or</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">Estimator</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">_ValidatorParams</span><span class="p">))</span>
<span class="p">)</span>
<span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">getAllNestedStages</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="s2">&quot;Params&quot;</span><span class="p">]:</span>
<span class="kn">from</span> <span class="nn">pyspark.ml</span> <span class="kn">import</span> <span class="n">Pipeline</span><span class="p">,</span> <span class="n">PipelineModel</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.tuning</span> <span class="kn">import</span> <span class="n">_ValidatorParams</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.classification</span> <span class="kn">import</span> <span class="n">OneVsRest</span><span class="p">,</span> <span class="n">OneVsRestModel</span>
<span class="c1"># TODO: We need to handle `RFormulaModel.pipelineModel` here after Pyspark RFormulaModel</span>
<span class="c1"># support pipelineModel property.</span>
<span class="n">pySubStages</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="s2">&quot;Params&quot;</span><span class="p">]</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">Pipeline</span><span class="p">):</span>
<span class="n">pySubStages</span> <span class="o">=</span> <span class="n">pyInstance</span><span class="o">.</span><span class="n">getStages</span><span class="p">()</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">PipelineModel</span><span class="p">):</span>
<span class="n">pySubStages</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="n">List</span><span class="p">[</span><span class="s2">&quot;PipelineStage&quot;</span><span class="p">],</span> <span class="n">pyInstance</span><span class="o">.</span><span class="n">stages</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">_ValidatorParams</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;PySpark does not support nested validator.&quot;</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">OneVsRest</span><span class="p">):</span>
<span class="n">pySubStages</span> <span class="o">=</span> <span class="p">[</span><span class="n">pyInstance</span><span class="o">.</span><span class="n">getClassifier</span><span class="p">()]</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">OneVsRestModel</span><span class="p">):</span>
<span class="n">pySubStages</span> <span class="o">=</span> <span class="p">[</span><span class="n">pyInstance</span><span class="o">.</span><span class="n">getClassifier</span><span class="p">()]</span> <span class="o">+</span> <span class="n">pyInstance</span><span class="o">.</span><span class="n">models</span> <span class="c1"># type: ignore[operator]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">pySubStages</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">nestedStages</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">pySubStage</span> <span class="ow">in</span> <span class="n">pySubStages</span><span class="p">:</span>
<span class="n">nestedStages</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">MetaAlgorithmReadWrite</span><span class="o">.</span><span class="n">getAllNestedStages</span><span class="p">(</span><span class="n">pySubStage</span><span class="p">))</span>
<span class="k">return</span> <span class="p">[</span><span class="n">pyInstance</span><span class="p">]</span> <span class="o">+</span> <span class="n">nestedStages</span>
<span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">getUidMap</span><span class="p">(</span><span class="n">instance</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="s2">&quot;Params&quot;</span><span class="p">]:</span>
<span class="n">nestedStages</span> <span class="o">=</span> <span class="n">MetaAlgorithmReadWrite</span><span class="o">.</span><span class="n">getAllNestedStages</span><span class="p">(</span><span class="n">instance</span><span class="p">)</span>
<span class="n">uidMap</span> <span class="o">=</span> <span class="p">{</span><span class="n">stage</span><span class="o">.</span><span class="n">uid</span><span class="p">:</span> <span class="n">stage</span> <span class="k">for</span> <span class="n">stage</span> <span class="ow">in</span> <span class="n">nestedStages</span><span class="p">}</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">nestedStages</span><span class="p">)</span> <span class="o">!=</span> <span class="nb">len</span><span class="p">(</span><span class="n">uidMap</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">instance</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__module__</span><span class="si">}</span><span class="s2">.</span><span class="si">{</span><span class="n">instance</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="sa">f</span><span class="s2">&quot;.load found a compound estimator with stages with duplicate &quot;</span>
<span class="sa">f</span><span class="s2">&quot;UIDs. List of UIDs: </span><span class="si">{</span><span class="nb">list</span><span class="p">(</span><span class="n">uidMap</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span><span class="si">}</span><span class="s2">.&quot;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">uidMap</span>
<span class="k">def</span> <span class="nf">try_remote_functions</span><span class="p">(</span><span class="n">f</span><span class="p">:</span> <span class="n">FuncT</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FuncT</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Mark API supported from Spark Connect.&quot;&quot;&quot;</span>
<span class="nd">@functools</span><span class="o">.</span><span class="n">wraps</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">wrapped</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="n">is_remote</span><span class="p">()</span> <span class="ow">and</span> <span class="s2">&quot;PYSPARK_NO_NAMESPACE_SHARE&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.connect</span> <span class="kn">import</span> <span class="n">functions</span>
<span class="k">return</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">functions</span><span class="p">,</span> <span class="n">f</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">f</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span>
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">FuncT</span><span class="p">,</span> <span class="n">wrapped</span><span class="p">)</span>
</pre></div>
</div>
<!-- Previous / next buttons -->
<div class='prev-next-area'>
</div>
</main>
</div>
</div>
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<div class="footer-item">
<p class="copyright">
&copy; Copyright .<br>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br>
</p>
</div>
</div>
</footer>
</body>
</html>