blob: 4aac255084a41416dfdfe7a9b86a26cb2c59ba5d [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>pyspark.mllib.evaluation &#8212; PySpark 3.5.3 documentation</title>
<link href="../../../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link rel="stylesheet"
href="../../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet" href="../../../_static/styles/pydata-sphinx-theme.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf">
<script id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/language_data.js"></script>
<script src="../../../_static/clipboard.min.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/mllib/evaluation.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="None">
<!-- Google Analytics -->
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<div class="container-fluid" id="banner"></div>
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main"><div class="container-xl">
<div id="navbar-start">
<a class="navbar-brand" href="../../../index.html">
<img src="../../../_static/spark-logo-reverse.png" class="logo" alt="logo">
</a>
</div>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-collapsible" aria-controls="navbar-collapsible" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-collapsible" class="col-lg-9 collapse navbar-collapse">
<div id="navbar-center" class="mr-auto">
<div class="navbar-center-item">
<ul id="navbar-main-elements" class="navbar-nav">
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../index.html">
Overview
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../getting_started/index.html">
Getting Started
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../user_guide/index.html">
User Guides
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../reference/index.html">
API Reference
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../development/index.html">
Development
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</div>
</div>
<div id="navbar-end">
<div class="navbar-end-item">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
3.5.3
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "_modules/pyspark/mllib/evaluation.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script>
</div>
</div>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<!-- Only show if we have sidebars configured, else just a small margin -->
<div class="col-12 col-md-3 bd-sidebar">
<div class="sidebar-start-items"><form class="bd-search d-flex align-items-center" action="../../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form><nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
</div>
</nav>
</div>
<div class="sidebar-end-items">
</div>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<h1>Source code for pyspark.mllib.evaluation</h1><div class="highlight"><pre>
<span></span><span class="c1">#</span>
<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
<span class="c1"># this work for additional information regarding copyright ownership.</span>
<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
<span class="c1"># (the &quot;License&quot;); you may not use this file except in compliance with</span>
<span class="c1"># the License. You may obtain a copy of the License at</span>
<span class="c1">#</span>
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="c1">#</span>
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
<span class="c1"># See the License for the specific language governing permissions and</span>
<span class="c1"># limitations under the License.</span>
<span class="c1">#</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Generic</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">,</span> <span class="n">TypeVar</span><span class="p">,</span> <span class="n">Union</span>
<span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">since</span>
<span class="kn">from</span> <span class="nn">pyspark.rdd</span> <span class="kn">import</span> <span class="n">RDD</span>
<span class="kn">from</span> <span class="nn">pyspark.mllib.common</span> <span class="kn">import</span> <span class="n">JavaModelWrapper</span><span class="p">,</span> <span class="n">callMLlibFunc</span>
<span class="kn">from</span> <span class="nn">pyspark.mllib.linalg</span> <span class="kn">import</span> <span class="n">Matrix</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SQLContext</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="kn">import</span> <span class="n">ArrayType</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">,</span> <span class="n">StructField</span><span class="p">,</span> <span class="n">StructType</span>
<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span>
<span class="s2">&quot;BinaryClassificationMetrics&quot;</span><span class="p">,</span>
<span class="s2">&quot;RegressionMetrics&quot;</span><span class="p">,</span>
<span class="s2">&quot;MulticlassMetrics&quot;</span><span class="p">,</span>
<span class="s2">&quot;RankingMetrics&quot;</span><span class="p">,</span>
<span class="p">]</span>
<span class="n">T</span> <span class="o">=</span> <span class="n">TypeVar</span><span class="p">(</span><span class="s2">&quot;T&quot;</span><span class="p">)</span>
<div class="viewcode-block" id="BinaryClassificationMetrics"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.BinaryClassificationMetrics.html#pyspark.mllib.evaluation.BinaryClassificationMetrics">[docs]</a><span class="k">class</span> <span class="nc">BinaryClassificationMetrics</span><span class="p">(</span><span class="n">JavaModelWrapper</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Evaluator for binary classification.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> scoreAndLabels : :py:class:`pyspark.RDD`</span>
<span class="sd"> an RDD of score, label and optional weight.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; scoreAndLabels = sc.parallelize([</span>
<span class="sd"> ... (0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)], 2)</span>
<span class="sd"> &gt;&gt;&gt; metrics = BinaryClassificationMetrics(scoreAndLabels)</span>
<span class="sd"> &gt;&gt;&gt; metrics.areaUnderROC</span>
<span class="sd"> 0.70...</span>
<span class="sd"> &gt;&gt;&gt; metrics.areaUnderPR</span>
<span class="sd"> 0.83...</span>
<span class="sd"> &gt;&gt;&gt; metrics.unpersist()</span>
<span class="sd"> &gt;&gt;&gt; scoreAndLabelsWithOptWeight = sc.parallelize([</span>
<span class="sd"> ... (0.1, 0.0, 1.0), (0.1, 1.0, 0.4), (0.4, 0.0, 0.2), (0.6, 0.0, 0.6), (0.6, 1.0, 0.9),</span>
<span class="sd"> ... (0.6, 1.0, 0.5), (0.8, 1.0, 0.7)], 2)</span>
<span class="sd"> &gt;&gt;&gt; metrics = BinaryClassificationMetrics(scoreAndLabelsWithOptWeight)</span>
<span class="sd"> &gt;&gt;&gt; metrics.areaUnderROC</span>
<span class="sd"> 0.79...</span>
<span class="sd"> &gt;&gt;&gt; metrics.areaUnderPR</span>
<span class="sd"> 0.88...</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scoreAndLabels</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]):</span>
<span class="n">sc</span> <span class="o">=</span> <span class="n">scoreAndLabels</span><span class="o">.</span><span class="n">ctx</span>
<span class="n">sql_ctx</span> <span class="o">=</span> <span class="n">SQLContext</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">(</span><span class="n">sc</span><span class="p">)</span>
<span class="n">numCol</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">scoreAndLabels</span><span class="o">.</span><span class="n">first</span><span class="p">())</span>
<span class="n">schema</span> <span class="o">=</span> <span class="n">StructType</span><span class="p">(</span>
<span class="p">[</span>
<span class="n">StructField</span><span class="p">(</span><span class="s2">&quot;score&quot;</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
<span class="n">StructField</span><span class="p">(</span><span class="s2">&quot;label&quot;</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
<span class="p">]</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">numCol</span> <span class="o">==</span> <span class="mi">3</span><span class="p">:</span>
<span class="n">schema</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="s2">&quot;weight&quot;</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="kc">False</span><span class="p">)</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">sql_ctx</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">scoreAndLabels</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">)</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_class</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">BinaryClassificationMetrics</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">java_class</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">_jdf</span><span class="p">)</span>
<span class="nb">super</span><span class="p">(</span><span class="n">BinaryClassificationMetrics</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">areaUnderROC</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Computes the area under the receiver operating characteristic</span>
<span class="sd"> (ROC) curve.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;areaUnderROC&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">areaUnderPR</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Computes the area under the precision-recall curve.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;areaUnderPR&quot;</span><span class="p">)</span>
<div class="viewcode-block" id="BinaryClassificationMetrics.unpersist"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.BinaryClassificationMetrics.html#pyspark.mllib.evaluation.BinaryClassificationMetrics.unpersist">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">unpersist</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Unpersists intermediate RDDs used in the computation.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;unpersist&quot;</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="RegressionMetrics"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.RegressionMetrics.html#pyspark.mllib.evaluation.RegressionMetrics">[docs]</a><span class="k">class</span> <span class="nc">RegressionMetrics</span><span class="p">(</span><span class="n">JavaModelWrapper</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Evaluator for regression.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> predictionAndObservations : :py:class:`pyspark.RDD`</span>
<span class="sd"> an RDD of prediction, observation and optional weight.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; predictionAndObservations = sc.parallelize([</span>
<span class="sd"> ... (2.5, 3.0), (0.0, -0.5), (2.0, 2.0), (8.0, 7.0)])</span>
<span class="sd"> &gt;&gt;&gt; metrics = RegressionMetrics(predictionAndObservations)</span>
<span class="sd"> &gt;&gt;&gt; metrics.explainedVariance</span>
<span class="sd"> 8.859...</span>
<span class="sd"> &gt;&gt;&gt; metrics.meanAbsoluteError</span>
<span class="sd"> 0.5...</span>
<span class="sd"> &gt;&gt;&gt; metrics.meanSquaredError</span>
<span class="sd"> 0.37...</span>
<span class="sd"> &gt;&gt;&gt; metrics.rootMeanSquaredError</span>
<span class="sd"> 0.61...</span>
<span class="sd"> &gt;&gt;&gt; metrics.r2</span>
<span class="sd"> 0.94...</span>
<span class="sd"> &gt;&gt;&gt; predictionAndObservationsWithOptWeight = sc.parallelize([</span>
<span class="sd"> ... (2.5, 3.0, 0.5), (0.0, -0.5, 1.0), (2.0, 2.0, 0.3), (8.0, 7.0, 0.9)])</span>
<span class="sd"> &gt;&gt;&gt; metrics = RegressionMetrics(predictionAndObservationsWithOptWeight)</span>
<span class="sd"> &gt;&gt;&gt; metrics.rootMeanSquaredError</span>
<span class="sd"> 0.68...</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">predictionAndObservations</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]):</span>
<span class="n">sc</span> <span class="o">=</span> <span class="n">predictionAndObservations</span><span class="o">.</span><span class="n">ctx</span>
<span class="n">sql_ctx</span> <span class="o">=</span> <span class="n">SQLContext</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">(</span><span class="n">sc</span><span class="p">)</span>
<span class="n">numCol</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">predictionAndObservations</span><span class="o">.</span><span class="n">first</span><span class="p">())</span>
<span class="n">schema</span> <span class="o">=</span> <span class="n">StructType</span><span class="p">(</span>
<span class="p">[</span>
<span class="n">StructField</span><span class="p">(</span><span class="s2">&quot;prediction&quot;</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
<span class="n">StructField</span><span class="p">(</span><span class="s2">&quot;observation&quot;</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
<span class="p">]</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">numCol</span> <span class="o">==</span> <span class="mi">3</span><span class="p">:</span>
<span class="n">schema</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="s2">&quot;weight&quot;</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="kc">False</span><span class="p">)</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">sql_ctx</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">predictionAndObservations</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">)</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_class</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">RegressionMetrics</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">java_class</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">_jdf</span><span class="p">)</span>
<span class="nb">super</span><span class="p">(</span><span class="n">RegressionMetrics</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">explainedVariance</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sa">r</span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the explained variance regression score.</span>
<span class="sd"> explainedVariance = :math:`1 - \frac{variance(y - \hat{y})}{variance(y)}`</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;explainedVariance&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">meanAbsoluteError</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the mean absolute error, which is a risk function corresponding to the</span>
<span class="sd"> expected value of the absolute error loss or l1-norm loss.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;meanAbsoluteError&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">meanSquaredError</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the mean squared error, which is a risk function corresponding to the</span>
<span class="sd"> expected value of the squared error loss or quadratic loss.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;meanSquaredError&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">rootMeanSquaredError</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the root mean squared error, which is defined as the square root of</span>
<span class="sd"> the mean squared error.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;rootMeanSquaredError&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">r2</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns R^2^, the coefficient of determination.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;r2&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="MulticlassMetrics"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.MulticlassMetrics.html#pyspark.mllib.evaluation.MulticlassMetrics">[docs]</a><span class="k">class</span> <span class="nc">MulticlassMetrics</span><span class="p">(</span><span class="n">JavaModelWrapper</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Evaluator for multiclass classification.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> predictionAndLabels : :py:class:`pyspark.RDD`</span>
<span class="sd"> an RDD of prediction, label, optional weight and optional probability.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; predictionAndLabels = sc.parallelize([(0.0, 0.0), (0.0, 1.0), (0.0, 0.0),</span>
<span class="sd"> ... (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)])</span>
<span class="sd"> &gt;&gt;&gt; metrics = MulticlassMetrics(predictionAndLabels)</span>
<span class="sd"> &gt;&gt;&gt; metrics.confusionMatrix().toArray()</span>
<span class="sd"> array([[ 2., 1., 1.],</span>
<span class="sd"> [ 1., 3., 0.],</span>
<span class="sd"> [ 0., 0., 1.]])</span>
<span class="sd"> &gt;&gt;&gt; metrics.falsePositiveRate(0.0)</span>
<span class="sd"> 0.2...</span>
<span class="sd"> &gt;&gt;&gt; metrics.precision(1.0)</span>
<span class="sd"> 0.75...</span>
<span class="sd"> &gt;&gt;&gt; metrics.recall(2.0)</span>
<span class="sd"> 1.0...</span>
<span class="sd"> &gt;&gt;&gt; metrics.fMeasure(0.0, 2.0)</span>
<span class="sd"> 0.52...</span>
<span class="sd"> &gt;&gt;&gt; metrics.accuracy</span>
<span class="sd"> 0.66...</span>
<span class="sd"> &gt;&gt;&gt; metrics.weightedFalsePositiveRate</span>
<span class="sd"> 0.19...</span>
<span class="sd"> &gt;&gt;&gt; metrics.weightedPrecision</span>
<span class="sd"> 0.68...</span>
<span class="sd"> &gt;&gt;&gt; metrics.weightedRecall</span>
<span class="sd"> 0.66...</span>
<span class="sd"> &gt;&gt;&gt; metrics.weightedFMeasure()</span>
<span class="sd"> 0.66...</span>
<span class="sd"> &gt;&gt;&gt; metrics.weightedFMeasure(2.0)</span>
<span class="sd"> 0.65...</span>
<span class="sd"> &gt;&gt;&gt; predAndLabelsWithOptWeight = sc.parallelize([(0.0, 0.0, 1.0), (0.0, 1.0, 1.0),</span>
<span class="sd"> ... (0.0, 0.0, 1.0), (1.0, 0.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0), (1.0, 1.0, 1.0),</span>
<span class="sd"> ... (2.0, 2.0, 1.0), (2.0, 0.0, 1.0)])</span>
<span class="sd"> &gt;&gt;&gt; metrics = MulticlassMetrics(predAndLabelsWithOptWeight)</span>
<span class="sd"> &gt;&gt;&gt; metrics.confusionMatrix().toArray()</span>
<span class="sd"> array([[ 2., 1., 1.],</span>
<span class="sd"> [ 1., 3., 0.],</span>
<span class="sd"> [ 0., 0., 1.]])</span>
<span class="sd"> &gt;&gt;&gt; metrics.falsePositiveRate(0.0)</span>
<span class="sd"> 0.2...</span>
<span class="sd"> &gt;&gt;&gt; metrics.precision(1.0)</span>
<span class="sd"> 0.75...</span>
<span class="sd"> &gt;&gt;&gt; metrics.recall(2.0)</span>
<span class="sd"> 1.0...</span>
<span class="sd"> &gt;&gt;&gt; metrics.fMeasure(0.0, 2.0)</span>
<span class="sd"> 0.52...</span>
<span class="sd"> &gt;&gt;&gt; metrics.accuracy</span>
<span class="sd"> 0.66...</span>
<span class="sd"> &gt;&gt;&gt; metrics.weightedFalsePositiveRate</span>
<span class="sd"> 0.19...</span>
<span class="sd"> &gt;&gt;&gt; metrics.weightedPrecision</span>
<span class="sd"> 0.68...</span>
<span class="sd"> &gt;&gt;&gt; metrics.weightedRecall</span>
<span class="sd"> 0.66...</span>
<span class="sd"> &gt;&gt;&gt; metrics.weightedFMeasure()</span>
<span class="sd"> 0.66...</span>
<span class="sd"> &gt;&gt;&gt; metrics.weightedFMeasure(2.0)</span>
<span class="sd"> 0.65...</span>
<span class="sd"> &gt;&gt;&gt; predictionAndLabelsWithProbabilities = sc.parallelize([</span>
<span class="sd"> ... (1.0, 1.0, 1.0, [0.1, 0.8, 0.1]), (0.0, 2.0, 1.0, [0.9, 0.05, 0.05]),</span>
<span class="sd"> ... (0.0, 0.0, 1.0, [0.8, 0.2, 0.0]), (1.0, 1.0, 1.0, [0.3, 0.65, 0.05])])</span>
<span class="sd"> &gt;&gt;&gt; metrics = MulticlassMetrics(predictionAndLabelsWithProbabilities)</span>
<span class="sd"> &gt;&gt;&gt; metrics.logLoss()</span>
<span class="sd"> 0.9682...</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">predictionAndLabels</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">float</span><span class="p">]]):</span>
<span class="n">sc</span> <span class="o">=</span> <span class="n">predictionAndLabels</span><span class="o">.</span><span class="n">ctx</span>
<span class="n">sql_ctx</span> <span class="o">=</span> <span class="n">SQLContext</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">(</span><span class="n">sc</span><span class="p">)</span>
<span class="n">numCol</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">predictionAndLabels</span><span class="o">.</span><span class="n">first</span><span class="p">())</span>
<span class="n">schema</span> <span class="o">=</span> <span class="n">StructType</span><span class="p">(</span>
<span class="p">[</span>
<span class="n">StructField</span><span class="p">(</span><span class="s2">&quot;prediction&quot;</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
<span class="n">StructField</span><span class="p">(</span><span class="s2">&quot;label&quot;</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span>
<span class="p">]</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">numCol</span> <span class="o">&gt;=</span> <span class="mi">3</span><span class="p">:</span>
<span class="n">schema</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="s2">&quot;weight&quot;</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="kc">False</span><span class="p">)</span>
<span class="k">if</span> <span class="n">numCol</span> <span class="o">==</span> <span class="mi">4</span><span class="p">:</span>
<span class="n">schema</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="s2">&quot;probability&quot;</span><span class="p">,</span> <span class="n">ArrayType</span><span class="p">(</span><span class="n">DoubleType</span><span class="p">(),</span> <span class="kc">False</span><span class="p">),</span> <span class="kc">False</span><span class="p">)</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">sql_ctx</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span><span class="n">predictionAndLabels</span><span class="p">,</span> <span class="n">schema</span><span class="p">)</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_class</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">MulticlassMetrics</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">java_class</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">_jdf</span><span class="p">)</span>
<span class="nb">super</span><span class="p">(</span><span class="n">MulticlassMetrics</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span>
<div class="viewcode-block" id="MulticlassMetrics.confusionMatrix"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.MulticlassMetrics.html#pyspark.mllib.evaluation.MulticlassMetrics.confusionMatrix">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">confusionMatrix</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Matrix</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns confusion matrix: predicted classes are in columns,</span>
<span class="sd"> they are ordered by class label ascending, as in &quot;labels&quot;.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;confusionMatrix&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="MulticlassMetrics.truePositiveRate"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.MulticlassMetrics.html#pyspark.mllib.evaluation.MulticlassMetrics.truePositiveRate">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">truePositiveRate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">label</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns true positive rate for a given label (category).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;truePositiveRate&quot;</span><span class="p">,</span> <span class="n">label</span><span class="p">)</span></div>
<div class="viewcode-block" id="MulticlassMetrics.falsePositiveRate"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.MulticlassMetrics.html#pyspark.mllib.evaluation.MulticlassMetrics.falsePositiveRate">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">falsePositiveRate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">label</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns false positive rate for a given label (category).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;falsePositiveRate&quot;</span><span class="p">,</span> <span class="n">label</span><span class="p">)</span></div>
<div class="viewcode-block" id="MulticlassMetrics.precision"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.MulticlassMetrics.html#pyspark.mllib.evaluation.MulticlassMetrics.precision">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">precision</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">label</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns precision.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;precision&quot;</span><span class="p">,</span> <span class="nb">float</span><span class="p">(</span><span class="n">label</span><span class="p">))</span></div>
<div class="viewcode-block" id="MulticlassMetrics.recall"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.MulticlassMetrics.html#pyspark.mllib.evaluation.MulticlassMetrics.recall">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">recall</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">label</span><span class="p">:</span> <span class="nb">float</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns recall.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;recall&quot;</span><span class="p">,</span> <span class="nb">float</span><span class="p">(</span><span class="n">label</span><span class="p">))</span></div>
<div class="viewcode-block" id="MulticlassMetrics.fMeasure"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.MulticlassMetrics.html#pyspark.mllib.evaluation.MulticlassMetrics.fMeasure">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">fMeasure</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">label</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">beta</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns f-measure.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">beta</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;fMeasure&quot;</span><span class="p">,</span> <span class="n">label</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;fMeasure&quot;</span><span class="p">,</span> <span class="n">label</span><span class="p">,</span> <span class="n">beta</span><span class="p">)</span></div>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;2.0.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">accuracy</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns accuracy (equals to the total number of correctly classified instances</span>
<span class="sd"> out of the total number of instances).</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;accuracy&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">weightedTruePositiveRate</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns weighted true positive rate.</span>
<span class="sd"> (equals to precision, recall and f-measure)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;weightedTruePositiveRate&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">weightedFalsePositiveRate</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns weighted false positive rate.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;weightedFalsePositiveRate&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">weightedRecall</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns weighted averaged recall.</span>
<span class="sd"> (equals to precision, recall and f-measure)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;weightedRecall&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">weightedPrecision</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns weighted averaged precision.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;weightedPrecision&quot;</span><span class="p">)</span>
<div class="viewcode-block" id="MulticlassMetrics.weightedFMeasure"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.MulticlassMetrics.html#pyspark.mllib.evaluation.MulticlassMetrics.weightedFMeasure">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">weightedFMeasure</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">beta</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns weighted averaged f-measure.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">beta</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;weightedFMeasure&quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;weightedFMeasure&quot;</span><span class="p">,</span> <span class="n">beta</span><span class="p">)</span></div>
<div class="viewcode-block" id="MulticlassMetrics.logLoss"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.MulticlassMetrics.html#pyspark.mllib.evaluation.MulticlassMetrics.logLoss">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;3.0.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">logLoss</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">eps</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">1e-15</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns weighted logLoss.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;logLoss&quot;</span><span class="p">,</span> <span class="n">eps</span><span class="p">)</span></div></div>
<div class="viewcode-block" id="RankingMetrics"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.RankingMetrics.html#pyspark.mllib.evaluation.RankingMetrics">[docs]</a><span class="k">class</span> <span class="nc">RankingMetrics</span><span class="p">(</span><span class="n">JavaModelWrapper</span><span class="p">,</span> <span class="n">Generic</span><span class="p">[</span><span class="n">T</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Evaluator for ranking algorithms.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> predictionAndLabels : :py:class:`pyspark.RDD`</span>
<span class="sd"> an RDD of (predicted ranking, ground truth set) pairs</span>
<span class="sd"> or (predicted ranking, ground truth set,</span>
<span class="sd"> relevance value of ground truth set).</span>
<span class="sd"> Since 3.4.0, it supports ndcg evaluation with relevance value.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; predictionAndLabels = sc.parallelize([</span>
<span class="sd"> ... ([1, 6, 2, 7, 8, 3, 9, 10, 4, 5], [1, 2, 3, 4, 5]),</span>
<span class="sd"> ... ([4, 1, 5, 6, 2, 7, 3, 8, 9, 10], [1, 2, 3]),</span>
<span class="sd"> ... ([1, 2, 3, 4, 5], [])])</span>
<span class="sd"> &gt;&gt;&gt; metrics = RankingMetrics(predictionAndLabels)</span>
<span class="sd"> &gt;&gt;&gt; metrics.precisionAt(1)</span>
<span class="sd"> 0.33...</span>
<span class="sd"> &gt;&gt;&gt; metrics.precisionAt(5)</span>
<span class="sd"> 0.26...</span>
<span class="sd"> &gt;&gt;&gt; metrics.precisionAt(15)</span>
<span class="sd"> 0.17...</span>
<span class="sd"> &gt;&gt;&gt; metrics.meanAveragePrecision</span>
<span class="sd"> 0.35...</span>
<span class="sd"> &gt;&gt;&gt; metrics.meanAveragePrecisionAt(1)</span>
<span class="sd"> 0.3333333333333333...</span>
<span class="sd"> &gt;&gt;&gt; metrics.meanAveragePrecisionAt(2)</span>
<span class="sd"> 0.25...</span>
<span class="sd"> &gt;&gt;&gt; metrics.ndcgAt(3)</span>
<span class="sd"> 0.33...</span>
<span class="sd"> &gt;&gt;&gt; metrics.ndcgAt(10)</span>
<span class="sd"> 0.48...</span>
<span class="sd"> &gt;&gt;&gt; metrics.recallAt(1)</span>
<span class="sd"> 0.06...</span>
<span class="sd"> &gt;&gt;&gt; metrics.recallAt(5)</span>
<span class="sd"> 0.35...</span>
<span class="sd"> &gt;&gt;&gt; metrics.recallAt(15)</span>
<span class="sd"> 0.66...</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">predictionAndLabels</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span>
<span class="n">RDD</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">T</span><span class="p">],</span> <span class="n">List</span><span class="p">[</span><span class="n">T</span><span class="p">]]],</span> <span class="n">RDD</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">T</span><span class="p">],</span> <span class="n">List</span><span class="p">[</span><span class="n">T</span><span class="p">],</span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]]</span>
<span class="p">],</span>
<span class="p">):</span>
<span class="n">sc</span> <span class="o">=</span> <span class="n">predictionAndLabels</span><span class="o">.</span><span class="n">ctx</span>
<span class="n">sql_ctx</span> <span class="o">=</span> <span class="n">SQLContext</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">(</span><span class="n">sc</span><span class="p">)</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">sql_ctx</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span>
<span class="n">predictionAndLabels</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="n">sql_ctx</span><span class="o">.</span><span class="n">_inferSchema</span><span class="p">(</span><span class="n">predictionAndLabels</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">callMLlibFunc</span><span class="p">(</span><span class="s2">&quot;newRankingMetrics&quot;</span><span class="p">,</span> <span class="n">df</span><span class="o">.</span><span class="n">_jdf</span><span class="p">)</span>
<span class="nb">super</span><span class="p">(</span><span class="n">RankingMetrics</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span>
<div class="viewcode-block" id="RankingMetrics.precisionAt"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.RankingMetrics.html#pyspark.mllib.evaluation.RankingMetrics.precisionAt">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">precisionAt</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">k</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compute the average precision of all the queries, truncated at ranking position k.</span>
<span class="sd"> If for a query, the ranking algorithm returns n (n &lt; k) results, the precision value</span>
<span class="sd"> will be computed as #(relevant items retrieved) / k. This formula also applies when</span>
<span class="sd"> the size of the ground truth set is less than k.</span>
<span class="sd"> If a query has an empty ground truth set, zero will be used as precision together</span>
<span class="sd"> with a log warning.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;precisionAt&quot;</span><span class="p">,</span> <span class="nb">int</span><span class="p">(</span><span class="n">k</span><span class="p">))</span></div>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">meanAveragePrecision</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the mean average precision (MAP) of all the queries.</span>
<span class="sd"> If a query has an empty ground truth set, the average precision will be zero and</span>
<span class="sd"> a log warning is generated.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;meanAveragePrecision&quot;</span><span class="p">)</span>
<div class="viewcode-block" id="RankingMetrics.meanAveragePrecisionAt"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.RankingMetrics.html#pyspark.mllib.evaluation.RankingMetrics.meanAveragePrecisionAt">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;3.0.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">meanAveragePrecisionAt</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">k</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the mean average precision (MAP) at first k ranking of all the queries.</span>
<span class="sd"> If a query has an empty ground truth set, the average precision will be zero and</span>
<span class="sd"> a log warning is generated.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;meanAveragePrecisionAt&quot;</span><span class="p">,</span> <span class="nb">int</span><span class="p">(</span><span class="n">k</span><span class="p">))</span></div>
<div class="viewcode-block" id="RankingMetrics.ndcgAt"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.RankingMetrics.html#pyspark.mllib.evaluation.RankingMetrics.ndcgAt">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">ndcgAt</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">k</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compute the average NDCG value of all the queries, truncated at ranking position k.</span>
<span class="sd"> The discounted cumulative gain at position k is computed as:</span>
<span class="sd"> sum,,i=1,,^k^ (2^{relevance of &#39;&#39;i&#39;&#39;th item}^ - 1) / log(i + 1),</span>
<span class="sd"> and the NDCG is obtained by dividing the DCG value on the ground truth set.</span>
<span class="sd"> In the current implementation, the relevance value is binary.</span>
<span class="sd"> If a query has an empty ground truth set, zero will be used as NDCG together with</span>
<span class="sd"> a log warning.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;ndcgAt&quot;</span><span class="p">,</span> <span class="nb">int</span><span class="p">(</span><span class="n">k</span><span class="p">))</span></div>
<div class="viewcode-block" id="RankingMetrics.recallAt"><a class="viewcode-back" href="../../../reference/api/pyspark.mllib.evaluation.RankingMetrics.html#pyspark.mllib.evaluation.RankingMetrics.recallAt">[docs]</a> <span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;3.0.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">recallAt</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">k</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compute the average recall of all the queries, truncated at ranking position k.</span>
<span class="sd"> If for a query, the ranking algorithm returns n results, the recall value</span>
<span class="sd"> will be computed as #(relevant items retrieved) / #(ground truth set).</span>
<span class="sd"> This formula also applies when the size of the ground truth set is less than k.</span>
<span class="sd"> If a query has an empty ground truth set, zero will be used as recall together</span>
<span class="sd"> with a log warning.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;recallAt&quot;</span><span class="p">,</span> <span class="nb">int</span><span class="p">(</span><span class="n">k</span><span class="p">))</span></div></div>
<span class="k">class</span> <span class="nc">MultilabelMetrics</span><span class="p">(</span><span class="n">JavaModelWrapper</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Evaluator for multilabel classification.</span>
<span class="sd"> .. versionadded:: 1.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> predictionAndLabels : :py:class:`pyspark.RDD`</span>
<span class="sd"> an RDD of (predictions, labels) pairs,</span>
<span class="sd"> both are non-null Arrays, each with unique elements.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; predictionAndLabels = sc.parallelize([([0.0, 1.0], [0.0, 2.0]), ([0.0, 2.0], [0.0, 1.0]),</span>
<span class="sd"> ... ([], [0.0]), ([2.0], [2.0]), ([2.0, 0.0], [2.0, 0.0]),</span>
<span class="sd"> ... ([0.0, 1.0, 2.0], [0.0, 1.0]), ([1.0], [1.0, 2.0])])</span>
<span class="sd"> &gt;&gt;&gt; metrics = MultilabelMetrics(predictionAndLabels)</span>
<span class="sd"> &gt;&gt;&gt; metrics.precision(0.0)</span>
<span class="sd"> 1.0</span>
<span class="sd"> &gt;&gt;&gt; metrics.recall(1.0)</span>
<span class="sd"> 0.66...</span>
<span class="sd"> &gt;&gt;&gt; metrics.f1Measure(2.0)</span>
<span class="sd"> 0.5</span>
<span class="sd"> &gt;&gt;&gt; metrics.precision()</span>
<span class="sd"> 0.66...</span>
<span class="sd"> &gt;&gt;&gt; metrics.recall()</span>
<span class="sd"> 0.64...</span>
<span class="sd"> &gt;&gt;&gt; metrics.f1Measure()</span>
<span class="sd"> 0.63...</span>
<span class="sd"> &gt;&gt;&gt; metrics.microPrecision</span>
<span class="sd"> 0.72...</span>
<span class="sd"> &gt;&gt;&gt; metrics.microRecall</span>
<span class="sd"> 0.66...</span>
<span class="sd"> &gt;&gt;&gt; metrics.microF1Measure</span>
<span class="sd"> 0.69...</span>
<span class="sd"> &gt;&gt;&gt; metrics.hammingLoss</span>
<span class="sd"> 0.33...</span>
<span class="sd"> &gt;&gt;&gt; metrics.subsetAccuracy</span>
<span class="sd"> 0.28...</span>
<span class="sd"> &gt;&gt;&gt; metrics.accuracy</span>
<span class="sd"> 0.54...</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">predictionAndLabels</span><span class="p">:</span> <span class="n">RDD</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">],</span> <span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]]):</span>
<span class="n">sc</span> <span class="o">=</span> <span class="n">predictionAndLabels</span><span class="o">.</span><span class="n">ctx</span>
<span class="n">sql_ctx</span> <span class="o">=</span> <span class="n">SQLContext</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">(</span><span class="n">sc</span><span class="p">)</span>
<span class="n">df</span> <span class="o">=</span> <span class="n">sql_ctx</span><span class="o">.</span><span class="n">createDataFrame</span><span class="p">(</span>
<span class="n">predictionAndLabels</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="n">sql_ctx</span><span class="o">.</span><span class="n">_inferSchema</span><span class="p">(</span><span class="n">predictionAndLabels</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">assert</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="n">java_class</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jvm</span><span class="o">.</span><span class="n">org</span><span class="o">.</span><span class="n">apache</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="n">MultilabelMetrics</span>
<span class="n">java_model</span> <span class="o">=</span> <span class="n">java_class</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">_jdf</span><span class="p">)</span>
<span class="nb">super</span><span class="p">(</span><span class="n">MultilabelMetrics</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">java_model</span><span class="p">)</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">precision</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">label</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns precision or precision for a given label (category) if specified.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">label</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;precision&quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;precision&quot;</span><span class="p">,</span> <span class="nb">float</span><span class="p">(</span><span class="n">label</span><span class="p">))</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">recall</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">label</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns recall or recall for a given label (category) if specified.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">label</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;recall&quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;recall&quot;</span><span class="p">,</span> <span class="nb">float</span><span class="p">(</span><span class="n">label</span><span class="p">))</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">f1Measure</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">label</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns f1Measure or f1Measure for a given label (category) if specified.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">label</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;f1Measure&quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;f1Measure&quot;</span><span class="p">,</span> <span class="nb">float</span><span class="p">(</span><span class="n">label</span><span class="p">))</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">microPrecision</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns micro-averaged label-based precision.</span>
<span class="sd"> (equals to micro-averaged document-based precision)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;microPrecision&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">microRecall</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns micro-averaged label-based recall.</span>
<span class="sd"> (equals to micro-averaged document-based recall)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;microRecall&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">microF1Measure</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns micro-averaged label-based f1-measure.</span>
<span class="sd"> (equals to micro-averaged document-based f1-measure)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;microF1Measure&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">hammingLoss</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns Hamming-loss.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;hammingLoss&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">subsetAccuracy</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns subset accuracy.</span>
<span class="sd"> (for equal sets of labels)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;subsetAccuracy&quot;</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="nd">@since</span><span class="p">(</span><span class="s2">&quot;1.4.0&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">accuracy</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns accuracy.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">call</span><span class="p">(</span><span class="s2">&quot;accuracy&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_test</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="kn">import</span> <span class="nn">doctest</span>
<span class="kn">import</span> <span class="nn">numpy</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span>
<span class="kn">import</span> <span class="nn">pyspark.mllib.evaluation</span>
<span class="k">try</span><span class="p">:</span>
<span class="c1"># Numpy 1.14+ changed it&#39;s string format.</span>
<span class="n">numpy</span><span class="o">.</span><span class="n">set_printoptions</span><span class="p">(</span><span class="n">legacy</span><span class="o">=</span><span class="s2">&quot;1.13&quot;</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">TypeError</span><span class="p">:</span>
<span class="k">pass</span>
<span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">mllib</span><span class="o">.</span><span class="n">evaluation</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">spark</span> <span class="o">=</span> <span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">master</span><span class="p">(</span><span class="s2">&quot;local[4]&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">&quot;mllib.evaluation tests&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span>
<span class="n">globs</span><span class="p">[</span><span class="s2">&quot;sc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">spark</span><span class="o">.</span><span class="n">sparkContext</span>
<span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span><span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span> <span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span><span class="p">)</span>
<span class="n">spark</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span>
<span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span>
<span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="n">_test</span><span class="p">()</span>
</pre></div>
</div>
<!-- Previous / next buttons -->
<div class='prev-next-area'>
</div>
</main>
</div>
</div>
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<div class="footer-item">
<p class="copyright">
&copy; Copyright .<br>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br>
</p>
</div>
</div>
</footer>
</body>
</html>