blob: 4bdd540f7fea0c8f6b667c9506053325ffa804bb [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>pyspark.ml.util &#8212; PySpark 3.2.2 documentation</title>
<link rel="stylesheet" href="../../../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css">
<link rel="stylesheet"
href="../../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet"
href="../../../_static/vendor/open-sans_all/1.44.1/index.css">
<link rel="stylesheet"
href="../../../_static/vendor/lato_latin-ext/1.44.1/index.css">
<link rel="stylesheet" href="../../../_static/basic.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../../../_static/js/index.3da636dd464baa7582d2.js">
<script id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/language_data.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en" />
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main">
<div class="container-xl">
<a class="navbar-brand" href="../../../index.html">
<img src="../../../_static/spark-logo-reverse.png" class="logo" alt="logo" />
</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-menu" class="col-lg-9 collapse navbar-collapse">
<ul id="navbar-main-elements" class="navbar-nav mr-auto">
<li class="nav-item ">
<a class="nav-link" href="../../../getting_started/index.html">Getting Started</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../user_guide/index.html">User Guide</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../reference/index.html">API Reference</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../development/index.html">Development</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../migration_guide/index.html">Migration Guide</a>
</li>
</ul>
<ul class="navbar-nav">
</ul>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<ul class="nav bd-sidenav">
</ul>
</nav>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<nav id="bd-toc-nav">
<ul class="nav section-nav flex-column">
</ul>
</nav>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<h1>Source code for pyspark.ml.util</h1><div class="highlight"><pre>
<span></span><span class="c1">#</span>
<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
<span class="c1"># this work for additional information regarding copyright ownership.</span>
<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
<span class="c1"># (the &quot;License&quot;); you may not use this file except in compliance with</span>
<span class="c1"># the License. You may obtain a copy of the License at</span>
<span class="c1">#</span>
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="c1">#</span>
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
<span class="c1"># See the License for the specific language governing permissions and</span>
<span class="c1"># limitations under the License.</span>
<span class="c1">#</span>
<span class="kn">import</span> <span class="nn">json</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">time</span>
<span class="kn">import</span> <span class="nn">uuid</span>
<span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">SparkContext</span><span class="p">,</span> <span class="n">since</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.common</span> <span class="kn">import</span> <span class="n">inherit_doc</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span>
<span class="kn">from</span> <span class="nn">pyspark.util</span> <span class="kn">import</span> <span class="n">VersionUtils</span>
<span class="k">def</span> <span class="nf">_jvm</span><span class="p">():</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the JVM view associated with SparkContext. Must be called</span>
<span class="sd"> after SparkContext is initialized.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">jvm</span> <span class="o">=</span> <span class="n">SparkContext</span><span class="o">.</span><span class="n">_jvm</span>
<span class="k">if</span> <span class="n">jvm</span><span class="p">:</span>
<span class="k">return</span> <span class="n">jvm</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="s2">&quot;Cannot load _jvm from SparkContext. Is SparkContext initialized?&quot;</span><span class="p">)</span>
<div class="viewcode-block" id="Identifiable"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.Identifiable.html#pyspark.ml.Identifiable">[docs]</a><span class="k">class</span> <span class="nc">Identifiable</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Object with a unique ID.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="c1">#: A unique id for the object.</span>
<span class="bp">self</span><span class="o">.</span><span class="n">uid</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_randomUID</span><span class="p">()</span>
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">uid</span>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">_randomUID</span><span class="p">(</span><span class="bp">cls</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generate a unique string id for the object. The default implementation</span>
<span class="sd"> concatenates the class name, &quot;_&quot;, and 12 random hex chars.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">str</span><span class="p">(</span><span class="bp">cls</span><span class="o">.</span><span class="vm">__name__</span> <span class="o">+</span> <span class="s2">&quot;_&quot;</span> <span class="o">+</span> <span class="n">uuid</span><span class="o">.</span><span class="n">uuid4</span><span class="p">()</span><span class="o">.</span><span class="n">hex</span><span class="p">[</span><span class="o">-</span><span class="mi">12</span><span class="p">:])</span></div>
<div class="viewcode-block" id="BaseReadWrite"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.BaseReadWrite.html#pyspark.ml.BaseReadWrite">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">BaseReadWrite</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Base class for MLWriter and MLReader. Stores information about the SparkContext</span>
<span class="sd"> and SparkSession.</span>
<span class="sd"> .. versionadded:: 2.3.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_sparkSession</span> <span class="o">=</span> <span class="kc">None</span>
<div class="viewcode-block" id="BaseReadWrite.session"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.BaseReadWrite.html#pyspark.ml.BaseReadWrite.session">[docs]</a> <span class="k">def</span> <span class="nf">session</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sparkSession</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Sets the Spark Session to use for saving/loading.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_sparkSession</span> <span class="o">=</span> <span class="n">sparkSession</span>
<span class="k">return</span> <span class="bp">self</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">sparkSession</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the user-specified Spark Session or the default.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sparkSession</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_sparkSession</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sparkSession</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">sc</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the underlying `SparkContext`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">sparkSession</span><span class="o">.</span><span class="n">sparkContext</span></div>
<div class="viewcode-block" id="MLWriter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWriter.html#pyspark.ml.MLWriter">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">MLWriter</span><span class="p">(</span><span class="n">BaseReadWrite</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Utility class that can save ML instances.</span>
<span class="sd"> .. versionadded:: 2.0.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">MLWriter</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">shouldOverwrite</span> <span class="o">=</span> <span class="kc">False</span>
<span class="bp">self</span><span class="o">.</span><span class="n">optionMap</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">def</span> <span class="nf">_handleOverwrite</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.wrapper</span> <span class="kn">import</span> <span class="n">JavaWrapper</span>
<span class="n">_java_obj</span> <span class="o">=</span> <span class="n">JavaWrapper</span><span class="o">.</span><span class="n">_new_java_obj</span><span class="p">(</span><span class="s2">&quot;org.apache.spark.ml.util.FileSystemOverwrite&quot;</span><span class="p">)</span>
<span class="n">wrapper</span> <span class="o">=</span> <span class="n">JavaWrapper</span><span class="p">(</span><span class="n">_java_obj</span><span class="p">)</span>
<span class="n">wrapper</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">&quot;handleOverwrite&quot;</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">sparkSession</span><span class="o">.</span><span class="n">_jsparkSession</span><span class="p">)</span>
<div class="viewcode-block" id="MLWriter.save"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWriter.html#pyspark.ml.MLWriter.save">[docs]</a> <span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Save the ML instance to the input path.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">shouldOverwrite</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_handleOverwrite</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">saveImpl</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div>
<div class="viewcode-block" id="MLWriter.saveImpl"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWriter.html#pyspark.ml.MLWriter.saveImpl">[docs]</a> <span class="k">def</span> <span class="nf">saveImpl</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> save() handles overwriting and then calls this method. Subclasses should override this</span>
<span class="sd"> method to implement the actual saving of the instance.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;MLWriter is not yet implemented for type: </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div>
<div class="viewcode-block" id="MLWriter.overwrite"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWriter.html#pyspark.ml.MLWriter.overwrite">[docs]</a> <span class="k">def</span> <span class="nf">overwrite</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Overwrites if the output path already exists.&quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">shouldOverwrite</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="MLWriter.option"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWriter.html#pyspark.ml.MLWriter.option">[docs]</a> <span class="k">def</span> <span class="nf">option</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Adds an option to the underlying MLWriter. See the documentation for the specific model&#39;s</span>
<span class="sd"> writer for possible options. The option name (key) is case-insensitive.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">optionMap</span><span class="p">[</span><span class="n">key</span><span class="o">.</span><span class="n">lower</span><span class="p">()]</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span></div></div>
<div class="viewcode-block" id="GeneralMLWriter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.GeneralMLWriter.html#pyspark.ml.GeneralMLWriter">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">GeneralMLWriter</span><span class="p">(</span><span class="n">MLWriter</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Utility class that can save ML instances in different formats.</span>
<span class="sd"> .. versionadded:: 2.4.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="GeneralMLWriter.format"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.GeneralMLWriter.html#pyspark.ml.GeneralMLWriter.format">[docs]</a> <span class="k">def</span> <span class="nf">format</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Specifies the format of ML export (&quot;pmml&quot;, &quot;internal&quot;, or the fully qualified class</span>
<span class="sd"> name for export).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">source</span> <span class="o">=</span> <span class="n">source</span>
<span class="k">return</span> <span class="bp">self</span></div></div>
<span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">JavaMLWriter</span><span class="p">(</span><span class="n">MLWriter</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> (Private) Specialization of :py:class:`MLWriter` for :py:class:`JavaParams` types</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instance</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">JavaMLWriter</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="n">_java_obj</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="n">_to_java</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span> <span class="o">=</span> <span class="n">_java_obj</span><span class="o">.</span><span class="n">write</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Save the ML instance to the input path.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;path should be a string, got type </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">path</span><span class="p">))</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">overwrite</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Overwrites if the output path already exists.&quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">overwrite</span><span class="p">()</span>
<span class="k">return</span> <span class="bp">self</span>
<span class="k">def</span> <span class="nf">option</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">option</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span>
<span class="k">def</span> <span class="nf">session</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sparkSession</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Sets the Spark Session to use for saving.&quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">session</span><span class="p">(</span><span class="n">sparkSession</span><span class="o">.</span><span class="n">_jsparkSession</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span>
<span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">GeneralJavaMLWriter</span><span class="p">(</span><span class="n">JavaMLWriter</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> (Private) Specialization of :py:class:`GeneralMLWriter` for :py:class:`JavaParams` types</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instance</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">GeneralJavaMLWriter</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">instance</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">format</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">source</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Specifies the format of ML export (&quot;pmml&quot;, &quot;internal&quot;, or the fully qualified class</span>
<span class="sd"> name for export).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jwrite</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">source</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span>
<div class="viewcode-block" id="MLWritable"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWritable.html#pyspark.ml.MLWritable">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">MLWritable</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Mixin for ML instances that provide :py:class:`MLWriter`.</span>
<span class="sd"> .. versionadded:: 2.0.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="MLWritable.write"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWritable.html#pyspark.ml.MLWritable.write">[docs]</a> <span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Returns an MLWriter instance for this ML instance.&quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;MLWritable is not yet implemented for type: </span><span class="si">%r</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div>
<div class="viewcode-block" id="MLWritable.save"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLWritable.html#pyspark.ml.MLWritable.save">[docs]</a> <span class="k">def</span> <span class="nf">save</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Save this ML instance to the given path, a shortcut of &#39;write().save(path)&#39;.&quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">write</span><span class="p">()</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div></div>
<span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">JavaMLWritable</span><span class="p">(</span><span class="n">MLWritable</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> (Private) Mixin for ML instances that provide :py:class:`JavaMLWriter`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Returns an MLWriter instance for this ML instance.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">JavaMLWriter</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">GeneralJavaMLWritable</span><span class="p">(</span><span class="n">JavaMLWritable</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> (Private) Mixin for ML instances that provide :py:class:`GeneralJavaMLWriter`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Returns an GeneralMLWriter instance for this ML instance.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">GeneralJavaMLWriter</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<div class="viewcode-block" id="MLReader"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLReader.html#pyspark.ml.MLReader">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">MLReader</span><span class="p">(</span><span class="n">BaseReadWrite</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Utility class that can load ML instances.</span>
<span class="sd"> .. versionadded:: 2.0.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">MLReader</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<div class="viewcode-block" id="MLReader.load"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLReader.html#pyspark.ml.MLReader.load">[docs]</a> <span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Load the ML instance from the input path.&quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;MLReader is not yet implemented for type: </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div></div>
<span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">JavaMLReader</span><span class="p">(</span><span class="n">MLReader</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> (Private) Specialization of :py:class:`MLReader` for :py:class:`JavaParams` types</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">clazz</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">JavaMLReader</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_clazz</span> <span class="o">=</span> <span class="n">clazz</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jread</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_load_java_obj</span><span class="p">(</span><span class="n">clazz</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Load the ML instance from the input path.&quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;path should be a string, got type </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">path</span><span class="p">))</span>
<span class="n">java_obj</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_jread</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_clazz</span><span class="p">,</span> <span class="s2">&quot;_from_java&quot;</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;This Java ML type cannot be loaded into Python currently: </span><span class="si">%r</span><span class="s2">&quot;</span>
<span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">_clazz</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_clazz</span><span class="o">.</span><span class="n">_from_java</span><span class="p">(</span><span class="n">java_obj</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">session</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">sparkSession</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Sets the Spark Session to use for loading.&quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_jread</span><span class="o">.</span><span class="n">session</span><span class="p">(</span><span class="n">sparkSession</span><span class="o">.</span><span class="n">_jsparkSession</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">_java_loader_class</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">clazz</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the full class name of the Java ML instance. The default</span>
<span class="sd"> implementation replaces &quot;pyspark&quot; by &quot;org.apache.spark&quot; in</span>
<span class="sd"> the Python full class name.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">java_package</span> <span class="o">=</span> <span class="n">clazz</span><span class="o">.</span><span class="vm">__module__</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;pyspark&quot;</span><span class="p">,</span> <span class="s2">&quot;org.apache.spark&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">clazz</span><span class="o">.</span><span class="vm">__name__</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">&quot;Pipeline&quot;</span><span class="p">,</span> <span class="s2">&quot;PipelineModel&quot;</span><span class="p">):</span>
<span class="c1"># Remove the last package name &quot;pipeline&quot; for Pipeline and PipelineModel.</span>
<span class="n">java_package</span> <span class="o">=</span> <span class="s2">&quot;.&quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">java_package</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot;.&quot;</span><span class="p">)[</span><span class="mi">0</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
<span class="k">return</span> <span class="n">java_package</span> <span class="o">+</span> <span class="s2">&quot;.&quot;</span> <span class="o">+</span> <span class="n">clazz</span><span class="o">.</span><span class="vm">__name__</span>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">_load_java_obj</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">clazz</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Load the peer Java object of the ML instance.&quot;&quot;&quot;</span>
<span class="n">java_class</span> <span class="o">=</span> <span class="bp">cls</span><span class="o">.</span><span class="n">_java_loader_class</span><span class="p">(</span><span class="n">clazz</span><span class="p">)</span>
<span class="n">java_obj</span> <span class="o">=</span> <span class="n">_jvm</span><span class="p">()</span>
<span class="k">for</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">java_class</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot;.&quot;</span><span class="p">):</span>
<span class="n">java_obj</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">java_obj</span><span class="p">,</span> <span class="n">name</span><span class="p">)</span>
<span class="k">return</span> <span class="n">java_obj</span>
<div class="viewcode-block" id="MLReadable"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLReadable.html#pyspark.ml.MLReadable">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">MLReadable</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Mixin for instances that provide :py:class:`MLReader`.</span>
<span class="sd"> .. versionadded:: 2.0.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="MLReadable.read"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLReadable.html#pyspark.ml.MLReadable.read">[docs]</a> <span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">cls</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Returns an MLReader instance for this class.&quot;&quot;&quot;</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;MLReadable.read() not implemented for type: </span><span class="si">%r</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="bp">cls</span><span class="p">)</span></div>
<div class="viewcode-block" id="MLReadable.load"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.MLReadable.html#pyspark.ml.MLReadable.load">[docs]</a> <span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Reads an ML instance from the input path, a shortcut of `read().load(path)`.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">cls</span><span class="o">.</span><span class="n">read</span><span class="p">()</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">path</span><span class="p">)</span></div></div>
<span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">JavaMLReadable</span><span class="p">(</span><span class="n">MLReadable</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> (Private) Mixin for instances that provide JavaMLReader.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">cls</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Returns an MLReader instance for this class.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">JavaMLReader</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span>
<div class="viewcode-block" id="DefaultParamsWritable"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsWritable.html#pyspark.ml.DefaultParamsWritable">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">DefaultParamsWritable</span><span class="p">(</span><span class="n">MLWritable</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Helper trait for making simple :py:class:`Params` types writable. If a :py:class:`Params`</span>
<span class="sd"> class stores all data as :py:class:`Param` values, then extending this trait will provide</span>
<span class="sd"> a default implementation of writing saved instances of the class.</span>
<span class="sd"> This only handles simple :py:class:`Param` types; e.g., it will not handle</span>
<span class="sd"> :py:class:`pyspark.sql.DataFrame`. See :py:class:`DefaultParamsReadable`, the counterpart</span>
<span class="sd"> to this class.</span>
<span class="sd"> .. versionadded:: 2.3.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="DefaultParamsWritable.write"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsWritable.html#pyspark.ml.DefaultParamsWritable.write">[docs]</a> <span class="k">def</span> <span class="nf">write</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Returns a DefaultParamsWriter instance for this class.&quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.param</span> <span class="kn">import</span> <span class="n">Params</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">Params</span><span class="p">):</span>
<span class="k">return</span> <span class="n">DefaultParamsWriter</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;Cannot use DefautParamsWritable with type </span><span class="si">%s</span><span class="s2"> because it does not &quot;</span> <span class="o">+</span>
<span class="s2">&quot; extend Params.&quot;</span><span class="p">,</span> <span class="nb">type</span><span class="p">(</span><span class="bp">self</span><span class="p">))</span></div></div>
<div class="viewcode-block" id="DefaultParamsWriter"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsWriter.html#pyspark.ml.DefaultParamsWriter">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">DefaultParamsWriter</span><span class="p">(</span><span class="n">MLWriter</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Specialization of :py:class:`MLWriter` for :py:class:`Params` types</span>
<span class="sd"> Class for writing Estimators and Transformers whose parameters are JSON-serializable.</span>
<span class="sd"> .. versionadded:: 2.3.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">instance</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">DefaultParamsWriter</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">instance</span> <span class="o">=</span> <span class="n">instance</span>
<div class="viewcode-block" id="DefaultParamsWriter.saveImpl"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsWriter.html#pyspark.ml.DefaultParamsWriter.saveImpl">[docs]</a> <span class="k">def</span> <span class="nf">saveImpl</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
<span class="n">DefaultParamsWriter</span><span class="o">.</span><span class="n">saveMetadata</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">instance</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">sc</span><span class="p">)</span></div>
<div class="viewcode-block" id="DefaultParamsWriter.extractJsonParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsWriter.html#pyspark.ml.DefaultParamsWriter.extractJsonParams">[docs]</a> <span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">extractJsonParams</span><span class="p">(</span><span class="n">instance</span><span class="p">,</span> <span class="n">skipParams</span><span class="p">):</span>
<span class="n">paramMap</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="n">extractParamMap</span><span class="p">()</span>
<span class="n">jsonParams</span> <span class="o">=</span> <span class="p">{</span><span class="n">param</span><span class="o">.</span><span class="n">name</span><span class="p">:</span> <span class="n">value</span> <span class="k">for</span> <span class="n">param</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">paramMap</span><span class="o">.</span><span class="n">items</span><span class="p">()</span>
<span class="k">if</span> <span class="n">param</span><span class="o">.</span><span class="n">name</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">skipParams</span><span class="p">}</span>
<span class="k">return</span> <span class="n">jsonParams</span></div>
<div class="viewcode-block" id="DefaultParamsWriter.saveMetadata"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsWriter.html#pyspark.ml.DefaultParamsWriter.saveMetadata">[docs]</a> <span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">saveMetadata</span><span class="p">(</span><span class="n">instance</span><span class="p">,</span> <span class="n">path</span><span class="p">,</span> <span class="n">sc</span><span class="p">,</span> <span class="n">extraMetadata</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">paramMap</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Saves metadata + Params to: path + &quot;/metadata&quot;</span>
<span class="sd"> - class</span>
<span class="sd"> - timestamp</span>
<span class="sd"> - sparkVersion</span>
<span class="sd"> - uid</span>
<span class="sd"> - paramMap</span>
<span class="sd"> - defaultParamMap (since 2.4.0)</span>
<span class="sd"> - (optionally, extra metadata)</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> extraMetadata : dict, optional</span>
<span class="sd"> Extra metadata to be saved at same level as uid, paramMap, etc.</span>
<span class="sd"> paramMap : dict, optional</span>
<span class="sd"> If given, this is saved in the &quot;paramMap&quot; field.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">metadataPath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s2">&quot;metadata&quot;</span><span class="p">)</span>
<span class="n">metadataJson</span> <span class="o">=</span> <span class="n">DefaultParamsWriter</span><span class="o">.</span><span class="n">_get_metadata_to_save</span><span class="p">(</span><span class="n">instance</span><span class="p">,</span>
<span class="n">sc</span><span class="p">,</span>
<span class="n">extraMetadata</span><span class="p">,</span>
<span class="n">paramMap</span><span class="p">)</span>
<span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">([</span><span class="n">metadataJson</span><span class="p">],</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">saveAsTextFile</span><span class="p">(</span><span class="n">metadataPath</span><span class="p">)</span></div>
<span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">_get_metadata_to_save</span><span class="p">(</span><span class="n">instance</span><span class="p">,</span> <span class="n">sc</span><span class="p">,</span> <span class="n">extraMetadata</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">paramMap</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Helper for :py:meth:`DefaultParamsWriter.saveMetadata` which extracts the JSON to save.</span>
<span class="sd"> This is useful for ensemble models which need to save metadata for many sub-models.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> See :py:meth:`DefaultParamsWriter.saveMetadata` for details on what this includes.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">uid</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="n">uid</span>
<span class="bp">cls</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="vm">__module__</span> <span class="o">+</span> <span class="s1">&#39;.&#39;</span> <span class="o">+</span> <span class="n">instance</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span>
<span class="c1"># User-supplied param values</span>
<span class="n">params</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="n">_paramMap</span>
<span class="n">jsonParams</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">if</span> <span class="n">paramMap</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">jsonParams</span> <span class="o">=</span> <span class="n">paramMap</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">params</span><span class="p">:</span>
<span class="n">jsonParams</span><span class="p">[</span><span class="n">p</span><span class="o">.</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="n">p</span><span class="p">]</span>
<span class="c1"># Default param values</span>
<span class="n">jsonDefaultParams</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">instance</span><span class="o">.</span><span class="n">_defaultParamMap</span><span class="p">:</span>
<span class="n">jsonDefaultParams</span><span class="p">[</span><span class="n">p</span><span class="o">.</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="n">_defaultParamMap</span><span class="p">[</span><span class="n">p</span><span class="p">]</span>
<span class="n">basicMetadata</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;class&quot;</span><span class="p">:</span> <span class="bp">cls</span><span class="p">,</span> <span class="s2">&quot;timestamp&quot;</span><span class="p">:</span> <span class="nb">int</span><span class="p">(</span><span class="nb">round</span><span class="p">(</span><span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span> <span class="o">*</span> <span class="mi">1000</span><span class="p">)),</span>
<span class="s2">&quot;sparkVersion&quot;</span><span class="p">:</span> <span class="n">sc</span><span class="o">.</span><span class="n">version</span><span class="p">,</span> <span class="s2">&quot;uid&quot;</span><span class="p">:</span> <span class="n">uid</span><span class="p">,</span> <span class="s2">&quot;paramMap&quot;</span><span class="p">:</span> <span class="n">jsonParams</span><span class="p">,</span>
<span class="s2">&quot;defaultParamMap&quot;</span><span class="p">:</span> <span class="n">jsonDefaultParams</span><span class="p">}</span>
<span class="k">if</span> <span class="n">extraMetadata</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">basicMetadata</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">extraMetadata</span><span class="p">)</span>
<span class="k">return</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">basicMetadata</span><span class="p">,</span> <span class="n">separators</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;,&#39;</span><span class="p">,</span> <span class="s1">&#39;:&#39;</span><span class="p">])</span></div>
<div class="viewcode-block" id="DefaultParamsReadable"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReadable.html#pyspark.ml.DefaultParamsReadable">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">DefaultParamsReadable</span><span class="p">(</span><span class="n">MLReadable</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Helper trait for making simple :py:class:`Params` types readable.</span>
<span class="sd"> If a :py:class:`Params` class stores all data as :py:class:`Param` values,</span>
<span class="sd"> then extending this trait will provide a default implementation of reading saved</span>
<span class="sd"> instances of the class. This only handles simple :py:class:`Param` types;</span>
<span class="sd"> e.g., it will not handle :py:class:`pyspark.sql.DataFrame`. See</span>
<span class="sd"> :py:class:`DefaultParamsWritable`, the counterpart to this class.</span>
<span class="sd"> .. versionadded:: 2.3.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<div class="viewcode-block" id="DefaultParamsReadable.read"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReadable.html#pyspark.ml.DefaultParamsReadable.read">[docs]</a> <span class="nd">@classmethod</span>
<span class="k">def</span> <span class="nf">read</span><span class="p">(</span><span class="bp">cls</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;Returns a DefaultParamsReader instance for this class.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">DefaultParamsReader</span><span class="p">(</span><span class="bp">cls</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="DefaultParamsReader"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReader.html#pyspark.ml.DefaultParamsReader">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">DefaultParamsReader</span><span class="p">(</span><span class="n">MLReader</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Specialization of :py:class:`MLReader` for :py:class:`Params` types</span>
<span class="sd"> Default :py:class:`MLReader` implementation for transformers and estimators that</span>
<span class="sd"> contain basic (json-serializable) params and no data. This will not handle</span>
<span class="sd"> more complex params or types with data (e.g., models with coefficients).</span>
<span class="sd"> .. versionadded:: 2.3.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="bp">cls</span><span class="p">):</span>
<span class="nb">super</span><span class="p">(</span><span class="n">DefaultParamsReader</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">cls</span> <span class="o">=</span> <span class="bp">cls</span>
<span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">__get_class</span><span class="p">(</span><span class="n">clazz</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads Python class from its name.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">parts</span> <span class="o">=</span> <span class="n">clazz</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s1">&#39;.&#39;</span><span class="p">)</span>
<span class="n">module</span> <span class="o">=</span> <span class="s2">&quot;.&quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">parts</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>
<span class="n">m</span> <span class="o">=</span> <span class="nb">__import__</span><span class="p">(</span><span class="n">module</span><span class="p">)</span>
<span class="k">for</span> <span class="n">comp</span> <span class="ow">in</span> <span class="n">parts</span><span class="p">[</span><span class="mi">1</span><span class="p">:]:</span>
<span class="n">m</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">m</span><span class="p">,</span> <span class="n">comp</span><span class="p">)</span>
<span class="k">return</span> <span class="n">m</span>
<div class="viewcode-block" id="DefaultParamsReader.load"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReader.html#pyspark.ml.DefaultParamsReader.load">[docs]</a> <span class="k">def</span> <span class="nf">load</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">path</span><span class="p">):</span>
<span class="n">metadata</span> <span class="o">=</span> <span class="n">DefaultParamsReader</span><span class="o">.</span><span class="n">loadMetadata</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">sc</span><span class="p">)</span>
<span class="n">py_type</span> <span class="o">=</span> <span class="n">DefaultParamsReader</span><span class="o">.</span><span class="n">__get_class</span><span class="p">(</span><span class="n">metadata</span><span class="p">[</span><span class="s1">&#39;class&#39;</span><span class="p">])</span>
<span class="n">instance</span> <span class="o">=</span> <span class="n">py_type</span><span class="p">()</span>
<span class="n">instance</span><span class="o">.</span><span class="n">_resetUid</span><span class="p">(</span><span class="n">metadata</span><span class="p">[</span><span class="s1">&#39;uid&#39;</span><span class="p">])</span>
<span class="n">DefaultParamsReader</span><span class="o">.</span><span class="n">getAndSetParams</span><span class="p">(</span><span class="n">instance</span><span class="p">,</span> <span class="n">metadata</span><span class="p">)</span>
<span class="k">return</span> <span class="n">instance</span></div>
<div class="viewcode-block" id="DefaultParamsReader.loadMetadata"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReader.html#pyspark.ml.DefaultParamsReader.loadMetadata">[docs]</a> <span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">loadMetadata</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">sc</span><span class="p">,</span> <span class="n">expectedClassName</span><span class="o">=</span><span class="s2">&quot;&quot;</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Load metadata saved using :py:meth:`DefaultParamsWriter.saveMetadata`</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> path : str</span>
<span class="sd"> sc : :py:class:`pyspark.SparkContext`</span>
<span class="sd"> expectedClassName : str, optional</span>
<span class="sd"> If non empty, this is checked against the loaded metadata.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">metadataPath</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="s2">&quot;metadata&quot;</span><span class="p">)</span>
<span class="n">metadataStr</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">textFile</span><span class="p">(</span><span class="n">metadataPath</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">first</span><span class="p">()</span>
<span class="n">loadedVals</span> <span class="o">=</span> <span class="n">DefaultParamsReader</span><span class="o">.</span><span class="n">_parseMetaData</span><span class="p">(</span><span class="n">metadataStr</span><span class="p">,</span> <span class="n">expectedClassName</span><span class="p">)</span>
<span class="k">return</span> <span class="n">loadedVals</span></div>
<span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">_parseMetaData</span><span class="p">(</span><span class="n">metadataStr</span><span class="p">,</span> <span class="n">expectedClassName</span><span class="o">=</span><span class="s2">&quot;&quot;</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Parse metadata JSON string produced by :py:meth`DefaultParamsWriter._get_metadata_to_save`.</span>
<span class="sd"> This is a helper function for :py:meth:`DefaultParamsReader.loadMetadata`.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> metadataStr : str</span>
<span class="sd"> JSON string of metadata</span>
<span class="sd"> expectedClassName : str, optional</span>
<span class="sd"> If non empty, this is checked against the loaded metadata.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">metadata</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">metadataStr</span><span class="p">)</span>
<span class="n">className</span> <span class="o">=</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">&#39;class&#39;</span><span class="p">]</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">expectedClassName</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">assert</span> <span class="n">className</span> <span class="o">==</span> <span class="n">expectedClassName</span><span class="p">,</span> <span class="s2">&quot;Error loading metadata: Expected &quot;</span> <span class="o">+</span> \
<span class="s2">&quot;class name </span><span class="si">{}</span><span class="s2"> but found class name </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">expectedClassName</span><span class="p">,</span> <span class="n">className</span><span class="p">)</span>
<span class="k">return</span> <span class="n">metadata</span>
<div class="viewcode-block" id="DefaultParamsReader.getAndSetParams"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReader.html#pyspark.ml.DefaultParamsReader.getAndSetParams">[docs]</a> <span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">getAndSetParams</span><span class="p">(</span><span class="n">instance</span><span class="p">,</span> <span class="n">metadata</span><span class="p">,</span> <span class="n">skipParams</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Extract Params from metadata, and set them in the instance.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Set user-supplied param values</span>
<span class="k">for</span> <span class="n">paramName</span> <span class="ow">in</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">&#39;paramMap&#39;</span><span class="p">]:</span>
<span class="n">param</span> <span class="o">=</span> <span class="n">instance</span><span class="o">.</span><span class="n">getParam</span><span class="p">(</span><span class="n">paramName</span><span class="p">)</span>
<span class="k">if</span> <span class="n">skipParams</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">paramName</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">skipParams</span><span class="p">:</span>
<span class="n">paramValue</span> <span class="o">=</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">&#39;paramMap&#39;</span><span class="p">][</span><span class="n">paramName</span><span class="p">]</span>
<span class="n">instance</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">param</span><span class="p">,</span> <span class="n">paramValue</span><span class="p">)</span>
<span class="c1"># Set default param values</span>
<span class="n">majorAndMinorVersions</span> <span class="o">=</span> <span class="n">VersionUtils</span><span class="o">.</span><span class="n">majorMinorVersion</span><span class="p">(</span><span class="n">metadata</span><span class="p">[</span><span class="s1">&#39;sparkVersion&#39;</span><span class="p">])</span>
<span class="n">major</span> <span class="o">=</span> <span class="n">majorAndMinorVersions</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">minor</span> <span class="o">=</span> <span class="n">majorAndMinorVersions</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="c1"># For metadata file prior to Spark 2.4, there is no default section.</span>
<span class="k">if</span> <span class="n">major</span> <span class="o">&gt;</span> <span class="mi">2</span> <span class="ow">or</span> <span class="p">(</span><span class="n">major</span> <span class="o">==</span> <span class="mi">2</span> <span class="ow">and</span> <span class="n">minor</span> <span class="o">&gt;=</span> <span class="mi">4</span><span class="p">):</span>
<span class="k">assert</span> <span class="s1">&#39;defaultParamMap&#39;</span> <span class="ow">in</span> <span class="n">metadata</span><span class="p">,</span> <span class="s2">&quot;Error loading metadata: Expected &quot;</span> <span class="o">+</span> \
<span class="s2">&quot;`defaultParamMap` section not found&quot;</span>
<span class="k">for</span> <span class="n">paramName</span> <span class="ow">in</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">&#39;defaultParamMap&#39;</span><span class="p">]:</span>
<span class="n">paramValue</span> <span class="o">=</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">&#39;defaultParamMap&#39;</span><span class="p">][</span><span class="n">paramName</span><span class="p">]</span>
<span class="n">instance</span><span class="o">.</span><span class="n">_setDefault</span><span class="p">(</span><span class="o">**</span><span class="p">{</span><span class="n">paramName</span><span class="p">:</span> <span class="n">paramValue</span><span class="p">})</span></div>
<div class="viewcode-block" id="DefaultParamsReader.isPythonParamsInstance"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReader.html#pyspark.ml.DefaultParamsReader.isPythonParamsInstance">[docs]</a> <span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">isPythonParamsInstance</span><span class="p">(</span><span class="n">metadata</span><span class="p">):</span>
<span class="k">return</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">&#39;class&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">&#39;pyspark.ml.&#39;</span><span class="p">)</span></div>
<div class="viewcode-block" id="DefaultParamsReader.loadParamsInstance"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.DefaultParamsReader.html#pyspark.ml.DefaultParamsReader.loadParamsInstance">[docs]</a> <span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">loadParamsInstance</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">sc</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Load a :py:class:`Params` instance from the given path, and return it.</span>
<span class="sd"> This assumes the instance inherits from :py:class:`MLReadable`.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">metadata</span> <span class="o">=</span> <span class="n">DefaultParamsReader</span><span class="o">.</span><span class="n">loadMetadata</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">sc</span><span class="p">)</span>
<span class="k">if</span> <span class="n">DefaultParamsReader</span><span class="o">.</span><span class="n">isPythonParamsInstance</span><span class="p">(</span><span class="n">metadata</span><span class="p">):</span>
<span class="n">pythonClassName</span> <span class="o">=</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">&#39;class&#39;</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">pythonClassName</span> <span class="o">=</span> <span class="n">metadata</span><span class="p">[</span><span class="s1">&#39;class&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;org.apache.spark&quot;</span><span class="p">,</span> <span class="s2">&quot;pyspark&quot;</span><span class="p">)</span>
<span class="n">py_type</span> <span class="o">=</span> <span class="n">DefaultParamsReader</span><span class="o">.</span><span class="n">__get_class</span><span class="p">(</span><span class="n">pythonClassName</span><span class="p">)</span>
<span class="n">instance</span> <span class="o">=</span> <span class="n">py_type</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="k">return</span> <span class="n">instance</span></div></div>
<div class="viewcode-block" id="HasTrainingSummary"><a class="viewcode-back" href="../../../reference/api/pyspark.ml.util.HasTrainingSummary.html#pyspark.ml.HasTrainingSummary">[docs]</a><span class="nd">@inherit_doc</span>
<span class="k">class</span> <span class="nc">HasTrainingSummary</span><span class="p">(</span><span class="nb">object</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Base class for models that provides Training summary.</span>
<span class="sd"> .. versionadded:: 3.0.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;2.1.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">hasSummary</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Indicates whether a training summary exists for this model</span>
<span class="sd"> instance.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">&quot;hasSummary&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;2.1.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">summary</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Gets summary of the model trained on the training set. An exception is thrown if</span>
<span class="sd"> no summary exists.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_call_java</span><span class="p">(</span><span class="s2">&quot;summary&quot;</span><span class="p">))</span></div>
<span class="k">class</span> <span class="nc">MetaAlgorithmReadWrite</span><span class="p">:</span>
<span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">isMetaEstimator</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">):</span>
<span class="kn">from</span> <span class="nn">pyspark.ml</span> <span class="kn">import</span> <span class="n">Estimator</span><span class="p">,</span> <span class="n">Pipeline</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.tuning</span> <span class="kn">import</span> <span class="n">_ValidatorParams</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.classification</span> <span class="kn">import</span> <span class="n">OneVsRest</span>
<span class="k">return</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">Pipeline</span><span class="p">)</span> <span class="ow">or</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">OneVsRest</span><span class="p">)</span> <span class="ow">or</span> \
<span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">Estimator</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">_ValidatorParams</span><span class="p">))</span>
<span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">getAllNestedStages</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">):</span>
<span class="kn">from</span> <span class="nn">pyspark.ml</span> <span class="kn">import</span> <span class="n">Pipeline</span><span class="p">,</span> <span class="n">PipelineModel</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.tuning</span> <span class="kn">import</span> <span class="n">_ValidatorParams</span>
<span class="kn">from</span> <span class="nn">pyspark.ml.classification</span> <span class="kn">import</span> <span class="n">OneVsRest</span><span class="p">,</span> <span class="n">OneVsRestModel</span>
<span class="c1"># TODO: We need to handle `RFormulaModel.pipelineModel` here after Pyspark RFormulaModel</span>
<span class="c1"># support pipelineModel property.</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">Pipeline</span><span class="p">):</span>
<span class="n">pySubStages</span> <span class="o">=</span> <span class="n">pyInstance</span><span class="o">.</span><span class="n">getStages</span><span class="p">()</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">PipelineModel</span><span class="p">):</span>
<span class="n">pySubStages</span> <span class="o">=</span> <span class="n">pyInstance</span><span class="o">.</span><span class="n">stages</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">_ValidatorParams</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s1">&#39;PySpark does not support nested validator.&#39;</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">OneVsRest</span><span class="p">):</span>
<span class="n">pySubStages</span> <span class="o">=</span> <span class="p">[</span><span class="n">pyInstance</span><span class="o">.</span><span class="n">getClassifier</span><span class="p">()]</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">pyInstance</span><span class="p">,</span> <span class="n">OneVsRestModel</span><span class="p">):</span>
<span class="n">pySubStages</span> <span class="o">=</span> <span class="p">[</span><span class="n">pyInstance</span><span class="o">.</span><span class="n">getClassifier</span><span class="p">()]</span> <span class="o">+</span> <span class="n">pyInstance</span><span class="o">.</span><span class="n">models</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">pySubStages</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">nestedStages</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">pySubStage</span> <span class="ow">in</span> <span class="n">pySubStages</span><span class="p">:</span>
<span class="n">nestedStages</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">MetaAlgorithmReadWrite</span><span class="o">.</span><span class="n">getAllNestedStages</span><span class="p">(</span><span class="n">pySubStage</span><span class="p">))</span>
<span class="k">return</span> <span class="p">[</span><span class="n">pyInstance</span><span class="p">]</span> <span class="o">+</span> <span class="n">nestedStages</span>
<span class="nd">@staticmethod</span>
<span class="k">def</span> <span class="nf">getUidMap</span><span class="p">(</span><span class="n">instance</span><span class="p">):</span>
<span class="n">nestedStages</span> <span class="o">=</span> <span class="n">MetaAlgorithmReadWrite</span><span class="o">.</span><span class="n">getAllNestedStages</span><span class="p">(</span><span class="n">instance</span><span class="p">)</span>
<span class="n">uidMap</span> <span class="o">=</span> <span class="p">{</span><span class="n">stage</span><span class="o">.</span><span class="n">uid</span><span class="p">:</span> <span class="n">stage</span> <span class="k">for</span> <span class="n">stage</span> <span class="ow">in</span> <span class="n">nestedStages</span><span class="p">}</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">nestedStages</span><span class="p">)</span> <span class="o">!=</span> <span class="nb">len</span><span class="p">(</span><span class="n">uidMap</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">instance</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__module__</span><span class="si">}</span><span class="s1">.</span><span class="si">{</span><span class="n">instance</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s1">&#39;</span>
<span class="sa">f</span><span class="s1">&#39;.load found a compound estimator with stages with duplicate &#39;</span>
<span class="sa">f</span><span class="s1">&#39;UIDs. List of UIDs: </span><span class="si">{</span><span class="nb">list</span><span class="p">(</span><span class="n">uidMap</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span><span class="si">}</span><span class="s1">.&#39;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">uidMap</span>
</pre></div>
</div>
<div class='prev-next-bottom'>
</div>
</main>
</div>
</div>
<script src="../../../_static/js/index.3da636dd464baa7582d2.js"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<p>
&copy; Copyright .<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/>
</p>
</div>
</footer>
</body>
</html>