| |
| <!DOCTYPE html> |
| |
| <html> |
| <head> |
| <meta charset="utf-8" /> |
| <title>QuantileDiscretizer — PySpark 3.3.3 documentation</title> |
| |
| <link rel="stylesheet" href="../../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css"> |
| |
| |
| <link rel="stylesheet" |
| href="../../_static/vendor/fontawesome/5.13.0/css/all.min.css"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2"> |
| <link rel="preload" as="font" type="font/woff2" crossorigin |
| href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2"> |
| |
| |
| |
| <link rel="stylesheet" |
| href="../../_static/vendor/open-sans_all/1.44.1/index.css"> |
| <link rel="stylesheet" |
| href="../../_static/vendor/lato_latin-ext/1.44.1/index.css"> |
| |
| |
| <link rel="stylesheet" href="../../_static/basic.css" type="text/css" /> |
| <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" /> |
| <link rel="stylesheet" type="text/css" href="../../_static/css/pyspark.css" /> |
| |
| <link rel="preload" as="script" href="../../_static/js/index.3da636dd464baa7582d2.js"> |
| |
| <script id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script> |
| <script src="../../_static/jquery.js"></script> |
| <script src="../../_static/underscore.js"></script> |
| <script src="../../_static/doctools.js"></script> |
| <script src="../../_static/language_data.js"></script> |
| <script src="../../_static/copybutton.js"></script> |
| <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script> |
| <script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script> |
| <script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script> |
| <link rel="search" title="Search" href="../../search.html" /> |
| <link rel="next" title="RobustScaler" href="pyspark.ml.feature.RobustScaler.html" /> |
| <link rel="prev" title="PolynomialExpansion" href="pyspark.ml.feature.PolynomialExpansion.html" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> |
| <meta name="docsearch:language" content="en" /> |
| </head> |
| <body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80"> |
| |
| <nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main"> |
| <div class="container-xl"> |
| |
| <a class="navbar-brand" href="../../index.html"> |
| |
| <img src="../../_static/spark-logo-reverse.png" class="logo" alt="logo" /> |
| |
| </a> |
| <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation"> |
| <span class="navbar-toggler-icon"></span> |
| </button> |
| |
| <div id="navbar-menu" class="col-lg-9 collapse navbar-collapse"> |
| <ul id="navbar-main-elements" class="navbar-nav mr-auto"> |
| |
| |
| <li class="nav-item "> |
| <a class="nav-link" href="../../getting_started/index.html">Getting Started</a> |
| </li> |
| |
| <li class="nav-item "> |
| <a class="nav-link" href="../../user_guide/index.html">User Guide</a> |
| </li> |
| |
| <li class="nav-item active"> |
| <a class="nav-link" href="../index.html">API Reference</a> |
| </li> |
| |
| <li class="nav-item "> |
| <a class="nav-link" href="../../development/index.html">Development</a> |
| </li> |
| |
| <li class="nav-item "> |
| <a class="nav-link" href="../../migration_guide/index.html">Migration Guide</a> |
| </li> |
| |
| |
| </ul> |
| |
| |
| |
| |
| <ul class="navbar-nav"> |
| |
| |
| </ul> |
| </div> |
| </div> |
| </nav> |
| |
| |
| <div class="container-xl"> |
| <div class="row"> |
| |
| <div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../../search.html" method="get"> |
| <i class="icon fas fa-search"></i> |
| <input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" > |
| </form> |
| <nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation"> |
| |
| <div class="bd-toc-item active"> |
| |
| |
| <ul class="nav bd-sidenav"> |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class=""> |
| <a href="../pyspark.sql/index.html">Spark SQL</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="../pyspark.pandas/index.html">Pandas API on Spark</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="../pyspark.ss/index.html">Structured Streaming</a> |
| </li> |
| |
| |
| |
| <li class="active"> |
| <a href="../pyspark.ml.html">MLlib (DataFrame-based)</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="../pyspark.streaming.html">Spark Streaming</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="../pyspark.mllib.html">MLlib (RDD-based)</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="../pyspark.html">Spark Core</a> |
| </li> |
| |
| |
| |
| <li class=""> |
| <a href="../pyspark.resource.html">Resource Management</a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| </ul> |
| |
| </nav> |
| </div> |
| |
| |
| |
| <div class="d-none d-xl-block col-xl-2 bd-toc"> |
| |
| |
| <nav id="bd-toc-nav"> |
| <ul class="nav section-nav flex-column"> |
| |
| </ul> |
| </nav> |
| |
| |
| |
| </div> |
| |
| |
| |
| <main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main"> |
| |
| <div> |
| |
| <div class="section" id="quantilediscretizer"> |
| <h1>QuantileDiscretizer<a class="headerlink" href="#quantilediscretizer" title="Permalink to this headline">¶</a></h1> |
| <dl class="py class"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer"> |
| <em class="property">class </em><code class="sig-prename descclassname">pyspark.ml.feature.</code><code class="sig-name descname">QuantileDiscretizer</code><span class="sig-paren">(</span><em class="sig-param"><span class="o">*</span></em>, <em class="sig-param"><span class="n">numBuckets</span><span class="p">:</span> <span class="n">int</span> <span class="o">=</span> <span class="default_value">2</span></em>, <em class="sig-param"><span class="n">inputCol</span><span class="p">:</span> <span class="n">Optional<span class="p">[</span>str<span class="p">]</span></span> <span class="o">=</span> <span class="default_value">None</span></em>, <em class="sig-param"><span class="n">outputCol</span><span class="p">:</span> <span class="n">Optional<span class="p">[</span>str<span class="p">]</span></span> <span class="o">=</span> <span class="default_value">None</span></em>, <em class="sig-param"><span class="n">relativeError</span><span class="p">:</span> <span class="n">float</span> <span class="o">=</span> <span class="default_value">0.001</span></em>, <em class="sig-param"><span class="n">handleInvalid</span><span class="p">:</span> <span class="n">str</span> <span class="o">=</span> <span class="default_value">'error'</span></em>, <em class="sig-param"><span class="n">numBucketsArray</span><span class="p">:</span> <span class="n">Optional<span class="p">[</span>List<span class="p">[</span>int<span class="p">]</span><span class="p">]</span></span> <span class="o">=</span> <span class="default_value">None</span></em>, <em class="sig-param"><span class="n">inputCols</span><span class="p">:</span> <span class="n">Optional<span class="p">[</span>List<span class="p">[</span>str<span class="p">]</span><span class="p">]</span></span> <span class="o">=</span> <span class="default_value">None</span></em>, <em class="sig-param"><span class="n">outputCols</span><span class="p">:</span> <span class="n">Optional<span class="p">[</span>List<span class="p">[</span>str<span class="p">]</span><span class="p">]</span></span> <span class="o">=</span> <span class="default_value">None</span></em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/pyspark/ml/feature.html#QuantileDiscretizer"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer" title="Permalink to this definition">¶</a></dt> |
| <dd><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer" title="pyspark.ml.feature.QuantileDiscretizer"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantileDiscretizer</span></code></a> takes a column with continuous features and outputs a column |
| with binned categorical features. The number of bins can be set using the <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.numBuckets" title="pyspark.ml.feature.QuantileDiscretizer.numBuckets"><code class="xref py py-attr docutils literal notranslate"><span class="pre">numBuckets</span></code></a> |
| parameter. It is possible that the number of buckets used will be less than this value, for |
| example, if there are too few distinct values of the input to create enough distinct quantiles. |
| Since 3.0.0, <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer" title="pyspark.ml.feature.QuantileDiscretizer"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantileDiscretizer</span></code></a> can map multiple columns at once by setting the |
| <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.inputCols" title="pyspark.ml.feature.QuantileDiscretizer.inputCols"><code class="xref py py-attr docutils literal notranslate"><span class="pre">inputCols</span></code></a> parameter. If both of the <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.inputCol" title="pyspark.ml.feature.QuantileDiscretizer.inputCol"><code class="xref py py-attr docutils literal notranslate"><span class="pre">inputCol</span></code></a> and <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.inputCols" title="pyspark.ml.feature.QuantileDiscretizer.inputCols"><code class="xref py py-attr docutils literal notranslate"><span class="pre">inputCols</span></code></a> |
| parameters are set, an Exception will be thrown. To specify the number of buckets for each |
| column, the <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.numBucketsArray" title="pyspark.ml.feature.QuantileDiscretizer.numBucketsArray"><code class="xref py py-attr docutils literal notranslate"><span class="pre">numBucketsArray</span></code></a> parameter can be set, or if the number of buckets |
| should be the same across columns, <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.numBuckets" title="pyspark.ml.feature.QuantileDiscretizer.numBuckets"><code class="xref py py-attr docutils literal notranslate"><span class="pre">numBuckets</span></code></a> can be set as a convenience.</p> |
| <div class="versionadded"> |
| <p><span class="versionmodified added">New in version 2.0.0.</span></p> |
| </div> |
| <p class="rubric">Notes</p> |
| <p>NaN handling: Note also that |
| <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer" title="pyspark.ml.feature.QuantileDiscretizer"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantileDiscretizer</span></code></a> will raise an error when it finds NaN values in the dataset, |
| but the user can also choose to either keep or remove NaN values within the dataset by setting |
| <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.handleInvalid" title="pyspark.ml.feature.QuantileDiscretizer.handleInvalid"><code class="xref py py-attr docutils literal notranslate"><span class="pre">handleInvalid</span></code></a> parameter. If the user chooses to keep NaN values, they will be |
| handled specially and placed into their own bucket, for example, if 4 buckets are used, then |
| non-NaN data will be put into buckets[0-3], but NaNs will be counted in a special bucket[4].</p> |
| <p>Algorithm: The bin ranges are chosen using an approximate algorithm (see the documentation for |
| <a class="reference internal" href="../pyspark.sql/api/pyspark.sql.DataFrameStatFunctions.approxQuantile.html#pyspark.sql.DataFrameStatFunctions.approxQuantile" title="pyspark.sql.DataFrameStatFunctions.approxQuantile"><code class="xref py py-meth docutils literal notranslate"><span class="pre">approxQuantile()</span></code></a> for a detailed description). |
| The precision of the approximation can be controlled with the |
| <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.relativeError" title="pyspark.ml.feature.QuantileDiscretizer.relativeError"><code class="xref py py-attr docutils literal notranslate"><span class="pre">relativeError</span></code></a> parameter. |
| The lower and upper bin bounds will be <cite>-Infinity</cite> and <cite>+Infinity</cite>, covering all real values.</p> |
| <p class="rubric">Examples</p> |
| <div class="doctest highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">>>> </span><span class="n">values</span> <span class="o">=</span> <span class="p">[(</span><span class="mf">0.1</span><span class="p">,),</span> <span class="p">(</span><span class="mf">0.4</span><span class="p">,),</span> <span class="p">(</span><span class="mf">1.2</span><span class="p">,),</span> <span class="p">(</span><span class="mf">1.5</span><span class="p">,),</span> <span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="s2">"nan"</span><span class="p">),),</span> <span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="s2">"nan"</span><span class="p">),)]</span> |
| <span class="gp">>>> </span><span class="n">df1</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">values</span><span class="p">,</span> <span class="p">[</span><span class="s2">"values"</span><span class="p">])</span> |
| <span class="gp">>>> </span><span class="n">qds1</span> <span class="o">=</span> <span class="n">QuantileDiscretizer</span><span class="p">(</span><span class="n">inputCol</span><span class="o">=</span><span class="s2">"values"</span><span class="p">,</span> <span class="n">outputCol</span><span class="o">=</span><span class="s2">"buckets"</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="n">qds1</span><span class="o">.</span><span class="n">setNumBuckets</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> |
| <span class="go">QuantileDiscretizer...</span> |
| <span class="gp">>>> </span><span class="n">qds1</span><span class="o">.</span><span class="n">setRelativeError</span><span class="p">(</span><span class="mf">0.01</span><span class="p">)</span> |
| <span class="go">QuantileDiscretizer...</span> |
| <span class="gp">>>> </span><span class="n">qds1</span><span class="o">.</span><span class="n">setHandleInvalid</span><span class="p">(</span><span class="s2">"error"</span><span class="p">)</span> |
| <span class="go">QuantileDiscretizer...</span> |
| <span class="gp">>>> </span><span class="n">qds1</span><span class="o">.</span><span class="n">getRelativeError</span><span class="p">()</span> |
| <span class="go">0.01</span> |
| <span class="gp">>>> </span><span class="n">bucketizer</span> <span class="o">=</span> <span class="n">qds1</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">df1</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="n">qds1</span><span class="o">.</span><span class="n">setHandleInvalid</span><span class="p">(</span><span class="s2">"keep"</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">df1</span><span class="p">)</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">df1</span><span class="p">)</span><span class="o">.</span><span class="n">count</span><span class="p">()</span> |
| <span class="go">6</span> |
| <span class="gp">>>> </span><span class="n">qds1</span><span class="o">.</span><span class="n">setHandleInvalid</span><span class="p">(</span><span class="s2">"skip"</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">df1</span><span class="p">)</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">df1</span><span class="p">)</span><span class="o">.</span><span class="n">count</span><span class="p">()</span> |
| <span class="go">4</span> |
| <span class="gp">>>> </span><span class="n">splits</span> <span class="o">=</span> <span class="n">bucketizer</span><span class="o">.</span><span class="n">getSplits</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">splits</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> |
| <span class="go">-inf</span> |
| <span class="gp">>>> </span><span class="nb">print</span><span class="p">(</span><span class="s2">"</span><span class="si">%2.1f</span><span class="s2">"</span> <span class="o">%</span> <span class="nb">round</span><span class="p">(</span><span class="n">splits</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="mi">1</span><span class="p">))</span> |
| <span class="go">0.4</span> |
| <span class="gp">>>> </span><span class="n">bucketed</span> <span class="o">=</span> <span class="n">bucketizer</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">df1</span><span class="p">)</span><span class="o">.</span><span class="n">head</span><span class="p">()</span> |
| <span class="gp">>>> </span><span class="n">bucketed</span><span class="o">.</span><span class="n">buckets</span> |
| <span class="go">0.0</span> |
| <span class="gp">>>> </span><span class="n">quantileDiscretizerPath</span> <span class="o">=</span> <span class="n">temp_path</span> <span class="o">+</span> <span class="s2">"/quantile-discretizer"</span> |
| <span class="gp">>>> </span><span class="n">qds1</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="n">quantileDiscretizerPath</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="n">loadedQds</span> <span class="o">=</span> <span class="n">QuantileDiscretizer</span><span class="o">.</span><span class="n">load</span><span class="p">(</span><span class="n">quantileDiscretizerPath</span><span class="p">)</span> |
| <span class="gp">>>> </span><span class="n">loadedQds</span><span class="o">.</span><span class="n">getNumBuckets</span><span class="p">()</span> <span class="o">==</span> <span class="n">qds1</span><span class="o">.</span><span class="n">getNumBuckets</span><span class="p">()</span> |
| <span class="go">True</span> |
| <span class="gp">>>> </span><span class="n">inputs</span> <span class="o">=</span> <span class="p">[(</span><span class="mf">0.1</span><span class="p">,</span> <span class="mf">0.0</span><span class="p">),</span> <span class="p">(</span><span class="mf">0.4</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">),</span> <span class="p">(</span><span class="mf">1.2</span><span class="p">,</span> <span class="mf">1.3</span><span class="p">),</span> <span class="p">(</span><span class="mf">1.5</span><span class="p">,</span> <span class="mf">1.5</span><span class="p">),</span> |
| <span class="gp">... </span> <span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="s2">"nan"</span><span class="p">),</span> <span class="nb">float</span><span class="p">(</span><span class="s2">"nan"</span><span class="p">)),</span> <span class="p">(</span><span class="nb">float</span><span class="p">(</span><span class="s2">"nan"</span><span class="p">),</span> <span class="nb">float</span><span class="p">(</span><span class="s2">"nan"</span><span class="p">))]</span> |
| <span class="gp">>>> </span><span class="n">df2</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">inputs</span><span class="p">,</span> <span class="p">[</span><span class="s2">"input1"</span><span class="p">,</span> <span class="s2">"input2"</span><span class="p">])</span> |
| <span class="gp">>>> </span><span class="n">qds2</span> <span class="o">=</span> <span class="n">QuantileDiscretizer</span><span class="p">(</span><span class="n">relativeError</span><span class="o">=</span><span class="mf">0.01</span><span class="p">,</span> <span class="n">handleInvalid</span><span class="o">=</span><span class="s2">"error"</span><span class="p">,</span> <span class="n">numBuckets</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> |
| <span class="gp">... </span> <span class="n">inputCols</span><span class="o">=</span><span class="p">[</span><span class="s2">"input1"</span><span class="p">,</span> <span class="s2">"input2"</span><span class="p">],</span> <span class="n">outputCols</span><span class="o">=</span><span class="p">[</span><span class="s2">"output1"</span><span class="p">,</span> <span class="s2">"output2"</span><span class="p">])</span> |
| <span class="gp">>>> </span><span class="n">qds2</span><span class="o">.</span><span class="n">getRelativeError</span><span class="p">()</span> |
| <span class="go">0.01</span> |
| <span class="gp">>>> </span><span class="n">qds2</span><span class="o">.</span><span class="n">setHandleInvalid</span><span class="p">(</span><span class="s2">"keep"</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">df2</span><span class="p">)</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">df2</span><span class="p">)</span><span class="o">.</span><span class="n">show</span><span class="p">()</span> |
| <span class="go">+------+------+-------+-------+</span> |
| <span class="go">|input1|input2|output1|output2|</span> |
| <span class="go">+------+------+-------+-------+</span> |
| <span class="go">| 0.1| 0.0| 0.0| 0.0|</span> |
| <span class="go">| 0.4| 1.0| 1.0| 1.0|</span> |
| <span class="go">| 1.2| 1.3| 1.0| 1.0|</span> |
| <span class="go">| 1.5| 1.5| 1.0| 1.0|</span> |
| <span class="go">| NaN| NaN| 2.0| 2.0|</span> |
| <span class="go">| NaN| NaN| 2.0| 2.0|</span> |
| <span class="go">+------+------+-------+-------+</span> |
| <span class="go">...</span> |
| <span class="gp">>>> </span><span class="n">qds3</span> <span class="o">=</span> <span class="n">QuantileDiscretizer</span><span class="p">(</span><span class="n">relativeError</span><span class="o">=</span><span class="mf">0.01</span><span class="p">,</span> <span class="n">handleInvalid</span><span class="o">=</span><span class="s2">"error"</span><span class="p">,</span> |
| <span class="gp">... </span> <span class="n">numBucketsArray</span><span class="o">=</span><span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">10</span><span class="p">],</span> <span class="n">inputCols</span><span class="o">=</span><span class="p">[</span><span class="s2">"input1"</span><span class="p">,</span> <span class="s2">"input2"</span><span class="p">],</span> |
| <span class="gp">... </span> <span class="n">outputCols</span><span class="o">=</span><span class="p">[</span><span class="s2">"output1"</span><span class="p">,</span> <span class="s2">"output2"</span><span class="p">])</span> |
| <span class="gp">>>> </span><span class="n">qds3</span><span class="o">.</span><span class="n">setHandleInvalid</span><span class="p">(</span><span class="s2">"skip"</span><span class="p">)</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">df2</span><span class="p">)</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="n">df2</span><span class="p">)</span><span class="o">.</span><span class="n">show</span><span class="p">()</span> |
| <span class="go">+------+------+-------+-------+</span> |
| <span class="go">|input1|input2|output1|output2|</span> |
| <span class="go">+------+------+-------+-------+</span> |
| <span class="go">| 0.1| 0.0| 1.0| 1.0|</span> |
| <span class="go">| 0.4| 1.0| 2.0| 2.0|</span> |
| <span class="go">| 1.2| 1.3| 3.0| 3.0|</span> |
| <span class="go">| 1.5| 1.5| 4.0| 4.0|</span> |
| <span class="go">+------+------+-------+-------+</span> |
| <span class="go">...</span> |
| </pre></div> |
| </div> |
| <p class="rubric">Methods</p> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.clear" title="pyspark.ml.feature.QuantileDiscretizer.clear"><code class="xref py py-obj docutils literal notranslate"><span class="pre">clear</span></code></a>(param)</p></td> |
| <td><p>Clears a param from the param map if it has been explicitly set.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.copy" title="pyspark.ml.feature.QuantileDiscretizer.copy"><code class="xref py py-obj docutils literal notranslate"><span class="pre">copy</span></code></a>([extra])</p></td> |
| <td><p>Creates a copy of this instance with the same uid and some extra params.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.explainParam" title="pyspark.ml.feature.QuantileDiscretizer.explainParam"><code class="xref py py-obj docutils literal notranslate"><span class="pre">explainParam</span></code></a>(param)</p></td> |
| <td><p>Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.explainParams" title="pyspark.ml.feature.QuantileDiscretizer.explainParams"><code class="xref py py-obj docutils literal notranslate"><span class="pre">explainParams</span></code></a>()</p></td> |
| <td><p>Returns the documentation of all params with their optionally default values and user-supplied values.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.extractParamMap" title="pyspark.ml.feature.QuantileDiscretizer.extractParamMap"><code class="xref py py-obj docutils literal notranslate"><span class="pre">extractParamMap</span></code></a>([extra])</p></td> |
| <td><p>Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < user-supplied values < extra.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.fit" title="pyspark.ml.feature.QuantileDiscretizer.fit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">fit</span></code></a>(dataset[, params])</p></td> |
| <td><p>Fits a model to the input dataset with optional parameters.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.fitMultiple" title="pyspark.ml.feature.QuantileDiscretizer.fitMultiple"><code class="xref py py-obj docutils literal notranslate"><span class="pre">fitMultiple</span></code></a>(dataset, paramMaps)</p></td> |
| <td><p>Fits a model to the input dataset for each param map in <cite>paramMaps</cite>.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.getHandleInvalid" title="pyspark.ml.feature.QuantileDiscretizer.getHandleInvalid"><code class="xref py py-obj docutils literal notranslate"><span class="pre">getHandleInvalid</span></code></a>()</p></td> |
| <td><p>Gets the value of handleInvalid or its default value.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.getInputCol" title="pyspark.ml.feature.QuantileDiscretizer.getInputCol"><code class="xref py py-obj docutils literal notranslate"><span class="pre">getInputCol</span></code></a>()</p></td> |
| <td><p>Gets the value of inputCol or its default value.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.getInputCols" title="pyspark.ml.feature.QuantileDiscretizer.getInputCols"><code class="xref py py-obj docutils literal notranslate"><span class="pre">getInputCols</span></code></a>()</p></td> |
| <td><p>Gets the value of inputCols or its default value.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.getNumBuckets" title="pyspark.ml.feature.QuantileDiscretizer.getNumBuckets"><code class="xref py py-obj docutils literal notranslate"><span class="pre">getNumBuckets</span></code></a>()</p></td> |
| <td><p>Gets the value of numBuckets or its default value.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.getNumBucketsArray" title="pyspark.ml.feature.QuantileDiscretizer.getNumBucketsArray"><code class="xref py py-obj docutils literal notranslate"><span class="pre">getNumBucketsArray</span></code></a>()</p></td> |
| <td><p>Gets the value of numBucketsArray or its default value.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.getOrDefault" title="pyspark.ml.feature.QuantileDiscretizer.getOrDefault"><code class="xref py py-obj docutils literal notranslate"><span class="pre">getOrDefault</span></code></a>(param)</p></td> |
| <td><p>Gets the value of a param in the user-supplied param map or its default value.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.getOutputCol" title="pyspark.ml.feature.QuantileDiscretizer.getOutputCol"><code class="xref py py-obj docutils literal notranslate"><span class="pre">getOutputCol</span></code></a>()</p></td> |
| <td><p>Gets the value of outputCol or its default value.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.getOutputCols" title="pyspark.ml.feature.QuantileDiscretizer.getOutputCols"><code class="xref py py-obj docutils literal notranslate"><span class="pre">getOutputCols</span></code></a>()</p></td> |
| <td><p>Gets the value of outputCols or its default value.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.getParam" title="pyspark.ml.feature.QuantileDiscretizer.getParam"><code class="xref py py-obj docutils literal notranslate"><span class="pre">getParam</span></code></a>(paramName)</p></td> |
| <td><p>Gets a param by its name.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.getRelativeError" title="pyspark.ml.feature.QuantileDiscretizer.getRelativeError"><code class="xref py py-obj docutils literal notranslate"><span class="pre">getRelativeError</span></code></a>()</p></td> |
| <td><p>Gets the value of relativeError or its default value.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.hasDefault" title="pyspark.ml.feature.QuantileDiscretizer.hasDefault"><code class="xref py py-obj docutils literal notranslate"><span class="pre">hasDefault</span></code></a>(param)</p></td> |
| <td><p>Checks whether a param has a default value.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.hasParam" title="pyspark.ml.feature.QuantileDiscretizer.hasParam"><code class="xref py py-obj docutils literal notranslate"><span class="pre">hasParam</span></code></a>(paramName)</p></td> |
| <td><p>Tests whether this instance contains a param with a given (string) name.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.isDefined" title="pyspark.ml.feature.QuantileDiscretizer.isDefined"><code class="xref py py-obj docutils literal notranslate"><span class="pre">isDefined</span></code></a>(param)</p></td> |
| <td><p>Checks whether a param is explicitly set by user or has a default value.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.isSet" title="pyspark.ml.feature.QuantileDiscretizer.isSet"><code class="xref py py-obj docutils literal notranslate"><span class="pre">isSet</span></code></a>(param)</p></td> |
| <td><p>Checks whether a param is explicitly set by user.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.load" title="pyspark.ml.feature.QuantileDiscretizer.load"><code class="xref py py-obj docutils literal notranslate"><span class="pre">load</span></code></a>(path)</p></td> |
| <td><p>Reads an ML instance from the input path, a shortcut of <cite>read().load(path)</cite>.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.read" title="pyspark.ml.feature.QuantileDiscretizer.read"><code class="xref py py-obj docutils literal notranslate"><span class="pre">read</span></code></a>()</p></td> |
| <td><p>Returns an MLReader instance for this class.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.save" title="pyspark.ml.feature.QuantileDiscretizer.save"><code class="xref py py-obj docutils literal notranslate"><span class="pre">save</span></code></a>(path)</p></td> |
| <td><p>Save this ML instance to the given path, a shortcut of ‘write().save(path)’.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.set" title="pyspark.ml.feature.QuantileDiscretizer.set"><code class="xref py py-obj docutils literal notranslate"><span class="pre">set</span></code></a>(param, value)</p></td> |
| <td><p>Sets a parameter in the embedded param map.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.setHandleInvalid" title="pyspark.ml.feature.QuantileDiscretizer.setHandleInvalid"><code class="xref py py-obj docutils literal notranslate"><span class="pre">setHandleInvalid</span></code></a>(value)</p></td> |
| <td><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.handleInvalid" title="pyspark.ml.feature.QuantileDiscretizer.handleInvalid"><code class="xref py py-attr docutils literal notranslate"><span class="pre">handleInvalid</span></code></a>.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.setInputCol" title="pyspark.ml.feature.QuantileDiscretizer.setInputCol"><code class="xref py py-obj docutils literal notranslate"><span class="pre">setInputCol</span></code></a>(value)</p></td> |
| <td><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.inputCol" title="pyspark.ml.feature.QuantileDiscretizer.inputCol"><code class="xref py py-attr docutils literal notranslate"><span class="pre">inputCol</span></code></a>.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.setInputCols" title="pyspark.ml.feature.QuantileDiscretizer.setInputCols"><code class="xref py py-obj docutils literal notranslate"><span class="pre">setInputCols</span></code></a>(value)</p></td> |
| <td><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.inputCols" title="pyspark.ml.feature.QuantileDiscretizer.inputCols"><code class="xref py py-attr docutils literal notranslate"><span class="pre">inputCols</span></code></a>.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.setNumBuckets" title="pyspark.ml.feature.QuantileDiscretizer.setNumBuckets"><code class="xref py py-obj docutils literal notranslate"><span class="pre">setNumBuckets</span></code></a>(value)</p></td> |
| <td><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.numBuckets" title="pyspark.ml.feature.QuantileDiscretizer.numBuckets"><code class="xref py py-attr docutils literal notranslate"><span class="pre">numBuckets</span></code></a>.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.setNumBucketsArray" title="pyspark.ml.feature.QuantileDiscretizer.setNumBucketsArray"><code class="xref py py-obj docutils literal notranslate"><span class="pre">setNumBucketsArray</span></code></a>(value)</p></td> |
| <td><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.numBucketsArray" title="pyspark.ml.feature.QuantileDiscretizer.numBucketsArray"><code class="xref py py-attr docutils literal notranslate"><span class="pre">numBucketsArray</span></code></a>.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.setOutputCol" title="pyspark.ml.feature.QuantileDiscretizer.setOutputCol"><code class="xref py py-obj docutils literal notranslate"><span class="pre">setOutputCol</span></code></a>(value)</p></td> |
| <td><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.outputCol" title="pyspark.ml.feature.QuantileDiscretizer.outputCol"><code class="xref py py-attr docutils literal notranslate"><span class="pre">outputCol</span></code></a>.</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.setOutputCols" title="pyspark.ml.feature.QuantileDiscretizer.setOutputCols"><code class="xref py py-obj docutils literal notranslate"><span class="pre">setOutputCols</span></code></a>(value)</p></td> |
| <td><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.outputCols" title="pyspark.ml.feature.QuantileDiscretizer.outputCols"><code class="xref py py-attr docutils literal notranslate"><span class="pre">outputCols</span></code></a>.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.setParams" title="pyspark.ml.feature.QuantileDiscretizer.setParams"><code class="xref py py-obj docutils literal notranslate"><span class="pre">setParams</span></code></a>(self, \*[, numBuckets, inputCol, …])</p></td> |
| <td><p>Set the params for the QuantileDiscretizer</p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.setRelativeError" title="pyspark.ml.feature.QuantileDiscretizer.setRelativeError"><code class="xref py py-obj docutils literal notranslate"><span class="pre">setRelativeError</span></code></a>(value)</p></td> |
| <td><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.relativeError" title="pyspark.ml.feature.QuantileDiscretizer.relativeError"><code class="xref py py-attr docutils literal notranslate"><span class="pre">relativeError</span></code></a>.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.write" title="pyspark.ml.feature.QuantileDiscretizer.write"><code class="xref py py-obj docutils literal notranslate"><span class="pre">write</span></code></a>()</p></td> |
| <td><p>Returns an MLWriter instance for this ML instance.</p></td> |
| </tr> |
| </tbody> |
| </table> |
| <p class="rubric">Attributes</p> |
| <table class="longtable table autosummary"> |
| <colgroup> |
| <col style="width: 10%" /> |
| <col style="width: 90%" /> |
| </colgroup> |
| <tbody> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.handleInvalid" title="pyspark.ml.feature.QuantileDiscretizer.handleInvalid"><code class="xref py py-obj docutils literal notranslate"><span class="pre">handleInvalid</span></code></a></p></td> |
| <td><p></p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.inputCol" title="pyspark.ml.feature.QuantileDiscretizer.inputCol"><code class="xref py py-obj docutils literal notranslate"><span class="pre">inputCol</span></code></a></p></td> |
| <td><p></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.inputCols" title="pyspark.ml.feature.QuantileDiscretizer.inputCols"><code class="xref py py-obj docutils literal notranslate"><span class="pre">inputCols</span></code></a></p></td> |
| <td><p></p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.numBuckets" title="pyspark.ml.feature.QuantileDiscretizer.numBuckets"><code class="xref py py-obj docutils literal notranslate"><span class="pre">numBuckets</span></code></a></p></td> |
| <td><p></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.numBucketsArray" title="pyspark.ml.feature.QuantileDiscretizer.numBucketsArray"><code class="xref py py-obj docutils literal notranslate"><span class="pre">numBucketsArray</span></code></a></p></td> |
| <td><p></p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.outputCol" title="pyspark.ml.feature.QuantileDiscretizer.outputCol"><code class="xref py py-obj docutils literal notranslate"><span class="pre">outputCol</span></code></a></p></td> |
| <td><p></p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.outputCols" title="pyspark.ml.feature.QuantileDiscretizer.outputCols"><code class="xref py py-obj docutils literal notranslate"><span class="pre">outputCols</span></code></a></p></td> |
| <td><p></p></td> |
| </tr> |
| <tr class="row-even"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.params" title="pyspark.ml.feature.QuantileDiscretizer.params"><code class="xref py py-obj docutils literal notranslate"><span class="pre">params</span></code></a></p></td> |
| <td><p>Returns all params ordered by name.</p></td> |
| </tr> |
| <tr class="row-odd"><td><p><a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.relativeError" title="pyspark.ml.feature.QuantileDiscretizer.relativeError"><code class="xref py py-obj docutils literal notranslate"><span class="pre">relativeError</span></code></a></p></td> |
| <td><p></p></td> |
| </tr> |
| </tbody> |
| </table> |
| <p class="rubric">Methods Documentation</p> |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.clear"> |
| <code class="sig-name descname">clear</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">param</span><span class="p">:</span> <span class="n"><a class="reference internal" href="pyspark.ml.param.Param.html#pyspark.ml.param.Param" title="pyspark.ml.param.Param">pyspark.ml.param.Param</a></span></em><span class="sig-paren">)</span> → None<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.clear" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Clears a param from the param map if it has been explicitly set.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.copy"> |
| <code class="sig-name descname">copy</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">extra</span><span class="p">:</span> <span class="n">Optional<span class="p">[</span>ParamMap<span class="p">]</span></span> <span class="o">=</span> <span class="default_value">None</span></em><span class="sig-paren">)</span> → JP<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.copy" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Creates a copy of this instance with the same uid and some |
| extra params. This implementation first calls Params.copy and |
| then make a copy of the companion Java pipeline component with |
| extra params. So both the Python wrapper and the Java pipeline |
| component get copied.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><dl class="simple"> |
| <dt><strong>extra</strong><span class="classifier">dict, optional</span></dt><dd><p>Extra parameters to copy to the new instance</p> |
| </dd> |
| </dl> |
| </dd> |
| <dt class="field-even">Returns</dt> |
| <dd class="field-even"><dl class="simple"> |
| <dt><code class="xref py py-class docutils literal notranslate"><span class="pre">JavaParams</span></code></dt><dd><p>Copy of this instance</p> |
| </dd> |
| </dl> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.explainParam"> |
| <code class="sig-name descname">explainParam</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">param</span><span class="p">:</span> <span class="n">Union<span class="p">[</span>str<span class="p">, </span><a class="reference internal" href="pyspark.ml.param.Param.html#pyspark.ml.param.Param" title="pyspark.ml.param.Param">pyspark.ml.param.Param</a><span class="p">]</span></span></em><span class="sig-paren">)</span> → str<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.explainParam" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Explains a single param and returns its name, doc, and optional |
| default value and user-supplied value in a string.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.explainParams"> |
| <code class="sig-name descname">explainParams</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → str<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.explainParams" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns the documentation of all params with their optionally |
| default values and user-supplied values.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.extractParamMap"> |
| <code class="sig-name descname">extractParamMap</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">extra</span><span class="p">:</span> <span class="n">Optional<span class="p">[</span>ParamMap<span class="p">]</span></span> <span class="o">=</span> <span class="default_value">None</span></em><span class="sig-paren">)</span> → ParamMap<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.extractParamMap" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Extracts the embedded default param values and user-supplied |
| values, and then merges them with extra values from input into |
| a flat param map, where the latter value is used if there exist |
| conflicts, i.e., with ordering: default param values < |
| user-supplied values < extra.</p> |
| <dl class="field-list simple"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><dl class="simple"> |
| <dt><strong>extra</strong><span class="classifier">dict, optional</span></dt><dd><p>extra param values</p> |
| </dd> |
| </dl> |
| </dd> |
| <dt class="field-even">Returns</dt> |
| <dd class="field-even"><dl class="simple"> |
| <dt>dict</dt><dd><p>merged param map</p> |
| </dd> |
| </dl> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.fit"> |
| <code class="sig-name descname">fit</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">dataset</span><span class="p">:</span> <span class="n">pyspark.sql.dataframe.DataFrame</span></em>, <em class="sig-param"><span class="n">params</span><span class="p">:</span> <span class="n">Union[ParamMap, List[ParamMap], Tuple[ParamMap], None]</span> <span class="o">=</span> <span class="default_value">None</span></em><span class="sig-paren">)</span> → Union<span class="p">[</span>M<span class="p">, </span>List<span class="p">[</span>M<span class="p">]</span><span class="p">]</span><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.fit" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Fits a model to the input dataset with optional parameters.</p> |
| <div class="versionadded"> |
| <p><span class="versionmodified added">New in version 1.3.0.</span></p> |
| </div> |
| <dl class="field-list"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><dl> |
| <dt><strong>dataset</strong><span class="classifier"><a class="reference internal" href="../pyspark.sql/api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.DataFrame</span></code></a></span></dt><dd><p>input dataset.</p> |
| </dd> |
| <dt><strong>params</strong><span class="classifier">dict or list or tuple, optional</span></dt><dd><p>an optional param map that overrides embedded params. If a list/tuple of |
| param maps is given, this calls fit on each param map and returns a list of |
| models.</p> |
| </dd> |
| </dl> |
| </dd> |
| <dt class="field-even">Returns</dt> |
| <dd class="field-even"><dl class="simple"> |
| <dt><code class="xref py py-class docutils literal notranslate"><span class="pre">Transformer</span></code> or a list of <code class="xref py py-class docutils literal notranslate"><span class="pre">Transformer</span></code></dt><dd><p>fitted model(s)</p> |
| </dd> |
| </dl> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.fitMultiple"> |
| <code class="sig-name descname">fitMultiple</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">dataset</span><span class="p">:</span> <span class="n">pyspark.sql.dataframe.DataFrame</span></em>, <em class="sig-param"><span class="n">paramMaps</span><span class="p">:</span> <span class="n">Sequence<span class="p">[</span>ParamMap<span class="p">]</span></span></em><span class="sig-paren">)</span> → Iterator<span class="p">[</span>Tuple<span class="p">[</span>int<span class="p">, </span>M<span class="p">]</span><span class="p">]</span><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.fitMultiple" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Fits a model to the input dataset for each param map in <cite>paramMaps</cite>.</p> |
| <div class="versionadded"> |
| <p><span class="versionmodified added">New in version 2.3.0.</span></p> |
| </div> |
| <dl class="field-list"> |
| <dt class="field-odd">Parameters</dt> |
| <dd class="field-odd"><dl> |
| <dt><strong>dataset</strong><span class="classifier"><a class="reference internal" href="../pyspark.sql/api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame" title="pyspark.sql.DataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.sql.DataFrame</span></code></a></span></dt><dd><p>input dataset.</p> |
| </dd> |
| <dt><strong>paramMaps</strong><span class="classifier"><code class="xref py py-class docutils literal notranslate"><span class="pre">collections.abc.Sequence</span></code></span></dt><dd><p>A Sequence of param maps.</p> |
| </dd> |
| </dl> |
| </dd> |
| <dt class="field-even">Returns</dt> |
| <dd class="field-even"><dl class="simple"> |
| <dt><code class="xref py py-class docutils literal notranslate"><span class="pre">_FitMultipleIterator</span></code></dt><dd><p>A thread safe iterable which contains one model for each param map. Each |
| call to <cite>next(modelIterator)</cite> will return <cite>(index, model)</cite> where model was fit |
| using <cite>paramMaps[index]</cite>. <cite>index</cite> values may not be sequential.</p> |
| </dd> |
| </dl> |
| </dd> |
| </dl> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.getHandleInvalid"> |
| <code class="sig-name descname">getHandleInvalid</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → str<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.getHandleInvalid" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the value of handleInvalid or its default value.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.getInputCol"> |
| <code class="sig-name descname">getInputCol</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → str<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.getInputCol" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the value of inputCol or its default value.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.getInputCols"> |
| <code class="sig-name descname">getInputCols</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → List<span class="p">[</span>str<span class="p">]</span><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.getInputCols" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the value of inputCols or its default value.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.getNumBuckets"> |
| <code class="sig-name descname">getNumBuckets</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → int<a class="reference internal" href="../../_modules/pyspark/ml/feature.html#QuantileDiscretizer.getNumBuckets"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.getNumBuckets" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the value of numBuckets or its default value.</p> |
| <div class="versionadded"> |
| <p><span class="versionmodified added">New in version 2.0.0.</span></p> |
| </div> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.getNumBucketsArray"> |
| <code class="sig-name descname">getNumBucketsArray</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → List<span class="p">[</span>int<span class="p">]</span><a class="reference internal" href="../../_modules/pyspark/ml/feature.html#QuantileDiscretizer.getNumBucketsArray"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.getNumBucketsArray" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the value of numBucketsArray or its default value.</p> |
| <div class="versionadded"> |
| <p><span class="versionmodified added">New in version 3.0.0.</span></p> |
| </div> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.getOrDefault"> |
| <code class="sig-name descname">getOrDefault</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">param</span><span class="p">:</span> <span class="n">Union<span class="p">[</span>str<span class="p">, </span><a class="reference internal" href="pyspark.ml.param.Param.html#pyspark.ml.param.Param" title="pyspark.ml.param.Param">pyspark.ml.param.Param</a><span class="p">[</span>T<span class="p">]</span><span class="p">]</span></span></em><span class="sig-paren">)</span> → Union<span class="p">[</span>Any<span class="p">, </span>T<span class="p">]</span><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.getOrDefault" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the value of a param in the user-supplied param map or its |
| default value. Raises an error if neither is set.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.getOutputCol"> |
| <code class="sig-name descname">getOutputCol</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → str<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.getOutputCol" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the value of outputCol or its default value.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.getOutputCols"> |
| <code class="sig-name descname">getOutputCols</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → List<span class="p">[</span>str<span class="p">]</span><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.getOutputCols" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the value of outputCols or its default value.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.getParam"> |
| <code class="sig-name descname">getParam</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">paramName</span><span class="p">:</span> <span class="n">str</span></em><span class="sig-paren">)</span> → <a class="reference internal" href="pyspark.ml.param.Param.html#pyspark.ml.param.Param" title="pyspark.ml.param.Param">pyspark.ml.param.Param</a><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.getParam" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets a param by its name.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.getRelativeError"> |
| <code class="sig-name descname">getRelativeError</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → float<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.getRelativeError" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Gets the value of relativeError or its default value.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.hasDefault"> |
| <code class="sig-name descname">hasDefault</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">param</span><span class="p">:</span> <span class="n">Union<span class="p">[</span>str<span class="p">, </span><a class="reference internal" href="pyspark.ml.param.Param.html#pyspark.ml.param.Param" title="pyspark.ml.param.Param">pyspark.ml.param.Param</a><span class="p">[</span>Any<span class="p">]</span><span class="p">]</span></span></em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.hasDefault" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks whether a param has a default value.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.hasParam"> |
| <code class="sig-name descname">hasParam</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">paramName</span><span class="p">:</span> <span class="n">str</span></em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.hasParam" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Tests whether this instance contains a param with a given |
| (string) name.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.isDefined"> |
| <code class="sig-name descname">isDefined</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">param</span><span class="p">:</span> <span class="n">Union<span class="p">[</span>str<span class="p">, </span><a class="reference internal" href="pyspark.ml.param.Param.html#pyspark.ml.param.Param" title="pyspark.ml.param.Param">pyspark.ml.param.Param</a><span class="p">[</span>Any<span class="p">]</span><span class="p">]</span></span></em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.isDefined" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks whether a param is explicitly set by user or has |
| a default value.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.isSet"> |
| <code class="sig-name descname">isSet</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">param</span><span class="p">:</span> <span class="n">Union<span class="p">[</span>str<span class="p">, </span><a class="reference internal" href="pyspark.ml.param.Param.html#pyspark.ml.param.Param" title="pyspark.ml.param.Param">pyspark.ml.param.Param</a><span class="p">[</span>Any<span class="p">]</span><span class="p">]</span></span></em><span class="sig-paren">)</span> → bool<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.isSet" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Checks whether a param is explicitly set by user.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.load"> |
| <em class="property">classmethod </em><code class="sig-name descname">load</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">path</span><span class="p">:</span> <span class="n">str</span></em><span class="sig-paren">)</span> → RL<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.load" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Reads an ML instance from the input path, a shortcut of <cite>read().load(path)</cite>.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.read"> |
| <em class="property">classmethod </em><code class="sig-name descname">read</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → pyspark.ml.util.JavaMLReader<span class="p">[</span>RL<span class="p">]</span><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.read" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns an MLReader instance for this class.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.save"> |
| <code class="sig-name descname">save</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">path</span><span class="p">:</span> <span class="n">str</span></em><span class="sig-paren">)</span> → None<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.save" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Save this ML instance to the given path, a shortcut of ‘write().save(path)’.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.set"> |
| <code class="sig-name descname">set</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">param</span><span class="p">:</span> <span class="n"><a class="reference internal" href="pyspark.ml.param.Param.html#pyspark.ml.param.Param" title="pyspark.ml.param.Param">pyspark.ml.param.Param</a></span></em>, <em class="sig-param"><span class="n">value</span><span class="p">:</span> <span class="n">Any</span></em><span class="sig-paren">)</span> → None<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.set" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets a parameter in the embedded param map.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.setHandleInvalid"> |
| <code class="sig-name descname">setHandleInvalid</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">value</span><span class="p">:</span> <span class="n">str</span></em><span class="sig-paren">)</span> → <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer" title="pyspark.ml.feature.QuantileDiscretizer">pyspark.ml.feature.QuantileDiscretizer</a><a class="reference internal" href="../../_modules/pyspark/ml/feature.html#QuantileDiscretizer.setHandleInvalid"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.setHandleInvalid" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.handleInvalid" title="pyspark.ml.feature.QuantileDiscretizer.handleInvalid"><code class="xref py py-attr docutils literal notranslate"><span class="pre">handleInvalid</span></code></a>.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.setInputCol"> |
| <code class="sig-name descname">setInputCol</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">value</span><span class="p">:</span> <span class="n">str</span></em><span class="sig-paren">)</span> → <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer" title="pyspark.ml.feature.QuantileDiscretizer">pyspark.ml.feature.QuantileDiscretizer</a><a class="reference internal" href="../../_modules/pyspark/ml/feature.html#QuantileDiscretizer.setInputCol"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.setInputCol" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.inputCol" title="pyspark.ml.feature.QuantileDiscretizer.inputCol"><code class="xref py py-attr docutils literal notranslate"><span class="pre">inputCol</span></code></a>.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.setInputCols"> |
| <code class="sig-name descname">setInputCols</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">value</span><span class="p">:</span> <span class="n">List<span class="p">[</span>str<span class="p">]</span></span></em><span class="sig-paren">)</span> → <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer" title="pyspark.ml.feature.QuantileDiscretizer">pyspark.ml.feature.QuantileDiscretizer</a><a class="reference internal" href="../../_modules/pyspark/ml/feature.html#QuantileDiscretizer.setInputCols"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.setInputCols" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.inputCols" title="pyspark.ml.feature.QuantileDiscretizer.inputCols"><code class="xref py py-attr docutils literal notranslate"><span class="pre">inputCols</span></code></a>.</p> |
| <div class="versionadded"> |
| <p><span class="versionmodified added">New in version 3.0.0.</span></p> |
| </div> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.setNumBuckets"> |
| <code class="sig-name descname">setNumBuckets</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">value</span><span class="p">:</span> <span class="n">int</span></em><span class="sig-paren">)</span> → <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer" title="pyspark.ml.feature.QuantileDiscretizer">pyspark.ml.feature.QuantileDiscretizer</a><a class="reference internal" href="../../_modules/pyspark/ml/feature.html#QuantileDiscretizer.setNumBuckets"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.setNumBuckets" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.numBuckets" title="pyspark.ml.feature.QuantileDiscretizer.numBuckets"><code class="xref py py-attr docutils literal notranslate"><span class="pre">numBuckets</span></code></a>.</p> |
| <div class="versionadded"> |
| <p><span class="versionmodified added">New in version 2.0.0.</span></p> |
| </div> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.setNumBucketsArray"> |
| <code class="sig-name descname">setNumBucketsArray</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">value</span><span class="p">:</span> <span class="n">List<span class="p">[</span>int<span class="p">]</span></span></em><span class="sig-paren">)</span> → <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer" title="pyspark.ml.feature.QuantileDiscretizer">pyspark.ml.feature.QuantileDiscretizer</a><a class="reference internal" href="../../_modules/pyspark/ml/feature.html#QuantileDiscretizer.setNumBucketsArray"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.setNumBucketsArray" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.numBucketsArray" title="pyspark.ml.feature.QuantileDiscretizer.numBucketsArray"><code class="xref py py-attr docutils literal notranslate"><span class="pre">numBucketsArray</span></code></a>.</p> |
| <div class="versionadded"> |
| <p><span class="versionmodified added">New in version 3.0.0.</span></p> |
| </div> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.setOutputCol"> |
| <code class="sig-name descname">setOutputCol</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">value</span><span class="p">:</span> <span class="n">str</span></em><span class="sig-paren">)</span> → <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer" title="pyspark.ml.feature.QuantileDiscretizer">pyspark.ml.feature.QuantileDiscretizer</a><a class="reference internal" href="../../_modules/pyspark/ml/feature.html#QuantileDiscretizer.setOutputCol"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.setOutputCol" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.outputCol" title="pyspark.ml.feature.QuantileDiscretizer.outputCol"><code class="xref py py-attr docutils literal notranslate"><span class="pre">outputCol</span></code></a>.</p> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.setOutputCols"> |
| <code class="sig-name descname">setOutputCols</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">value</span><span class="p">:</span> <span class="n">List<span class="p">[</span>str<span class="p">]</span></span></em><span class="sig-paren">)</span> → <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer" title="pyspark.ml.feature.QuantileDiscretizer">pyspark.ml.feature.QuantileDiscretizer</a><a class="reference internal" href="../../_modules/pyspark/ml/feature.html#QuantileDiscretizer.setOutputCols"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.setOutputCols" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.outputCols" title="pyspark.ml.feature.QuantileDiscretizer.outputCols"><code class="xref py py-attr docutils literal notranslate"><span class="pre">outputCols</span></code></a>.</p> |
| <div class="versionadded"> |
| <p><span class="versionmodified added">New in version 3.0.0.</span></p> |
| </div> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.setParams"> |
| <code class="sig-name descname">setParams</code><span class="sig-paren">(</span><em class="sig-param">self</em>, <em class="sig-param">\*</em>, <em class="sig-param">numBuckets=2</em>, <em class="sig-param">inputCol=None</em>, <em class="sig-param">outputCol=None</em>, <em class="sig-param">relativeError=0.001</em>, <em class="sig-param">handleInvalid="error"</em>, <em class="sig-param">numBucketsArray=None</em>, <em class="sig-param">inputCols=None</em>, <em class="sig-param">outputCols=None</em><span class="sig-paren">)</span><a class="reference internal" href="../../_modules/pyspark/ml/feature.html#QuantileDiscretizer.setParams"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.setParams" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Set the params for the QuantileDiscretizer</p> |
| <div class="versionadded"> |
| <p><span class="versionmodified added">New in version 2.0.0.</span></p> |
| </div> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.setRelativeError"> |
| <code class="sig-name descname">setRelativeError</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">value</span><span class="p">:</span> <span class="n">float</span></em><span class="sig-paren">)</span> → <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer" title="pyspark.ml.feature.QuantileDiscretizer">pyspark.ml.feature.QuantileDiscretizer</a><a class="reference internal" href="../../_modules/pyspark/ml/feature.html#QuantileDiscretizer.setRelativeError"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.setRelativeError" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Sets the value of <a class="reference internal" href="#pyspark.ml.feature.QuantileDiscretizer.relativeError" title="pyspark.ml.feature.QuantileDiscretizer.relativeError"><code class="xref py py-attr docutils literal notranslate"><span class="pre">relativeError</span></code></a>.</p> |
| <div class="versionadded"> |
| <p><span class="versionmodified added">New in version 2.0.0.</span></p> |
| </div> |
| </dd></dl> |
| |
| <dl class="py method"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.write"> |
| <code class="sig-name descname">write</code><span class="sig-paren">(</span><span class="sig-paren">)</span> → pyspark.ml.util.JavaMLWriter<a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.write" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns an MLWriter instance for this ML instance.</p> |
| </dd></dl> |
| |
| <p class="rubric">Attributes Documentation</p> |
| <dl class="py attribute"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.handleInvalid"> |
| <code class="sig-name descname">handleInvalid</code><em class="property">: pyspark.ml.param.Param[str]</em><em class="property"> = Param(parent='undefined', name='handleInvalid', doc="how to handle invalid entries. Options are skip (filter out rows with invalid values), error (throw an error), or keep (keep invalid values in a special additional bucket). Note that in the multiple columns case, the invalid handling is applied to all columns. That said for 'error' it will throw an error if any invalids are found in any columns, for 'skip' it will skip rows with any invalids in any columns, etc.")</em><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.handleInvalid" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.inputCol"> |
| <code class="sig-name descname">inputCol</code><em class="property"> = Param(parent='undefined', name='inputCol', doc='input column name.')</em><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.inputCol" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.inputCols"> |
| <code class="sig-name descname">inputCols</code><em class="property"> = Param(parent='undefined', name='inputCols', doc='input column names.')</em><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.inputCols" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.numBuckets"> |
| <code class="sig-name descname">numBuckets</code><em class="property">: pyspark.ml.param.Param[int]</em><em class="property"> = Param(parent='undefined', name='numBuckets', doc='Maximum number of buckets (quantiles, or categories) into which data points are grouped. Must be >= 2.')</em><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.numBuckets" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.numBucketsArray"> |
| <code class="sig-name descname">numBucketsArray</code><em class="property">: pyspark.ml.param.Param[List[int]]</em><em class="property"> = Param(parent='undefined', name='numBucketsArray', doc='Array of number of buckets (quantiles, or categories) into which data points are grouped. This is for multiple columns input. If transforming multiple columns and numBucketsArray is not set, but numBuckets is set, then numBuckets will be applied across all columns.')</em><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.numBucketsArray" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.outputCol"> |
| <code class="sig-name descname">outputCol</code><em class="property"> = Param(parent='undefined', name='outputCol', doc='output column name.')</em><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.outputCol" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.outputCols"> |
| <code class="sig-name descname">outputCols</code><em class="property"> = Param(parent='undefined', name='outputCols', doc='output column names.')</em><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.outputCols" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| <dl class="py attribute"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.params"> |
| <code class="sig-name descname">params</code><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.params" title="Permalink to this definition">¶</a></dt> |
| <dd><p>Returns all params ordered by name. The default implementation |
| uses <code class="xref py py-func docutils literal notranslate"><span class="pre">dir()</span></code> to get all attributes of type |
| <code class="xref py py-class docutils literal notranslate"><span class="pre">Param</span></code>.</p> |
| </dd></dl> |
| |
| <dl class="py attribute"> |
| <dt id="pyspark.ml.feature.QuantileDiscretizer.relativeError"> |
| <code class="sig-name descname">relativeError</code><em class="property"> = Param(parent='undefined', name='relativeError', doc='the relative target precision for the approximate quantile algorithm. Must be in the range [0, 1]')</em><a class="headerlink" href="#pyspark.ml.feature.QuantileDiscretizer.relativeError" title="Permalink to this definition">¶</a></dt> |
| <dd></dd></dl> |
| |
| </dd></dl> |
| |
| </div> |
| |
| |
| </div> |
| |
| |
| <div class='prev-next-bottom'> |
| |
| <a class='left-prev' id="prev-link" href="pyspark.ml.feature.PolynomialExpansion.html" title="previous page">PolynomialExpansion</a> |
| <a class='right-next' id="next-link" href="pyspark.ml.feature.RobustScaler.html" title="next page">RobustScaler</a> |
| |
| </div> |
| |
| </main> |
| |
| |
| </div> |
| </div> |
| |
| |
| <script src="../../_static/js/index.3da636dd464baa7582d2.js"></script> |
| |
| |
| <footer class="footer mt-5 mt-md-0"> |
| <div class="container"> |
| <p> |
| © Copyright .<br/> |
| Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/> |
| </p> |
| </div> |
| </footer> |
| </body> |
| </html> |