blob: fc2853396c4a03b601fd08562f2e9eb4513a1260 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>pyspark.pandas.window &#8212; PySpark master documentation</title>
<link href="../../../_static/styles/theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=1999514e3f237ded88cf" rel="stylesheet">
<link rel="stylesheet"
href="../../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet" href="../../../_static/styles/pydata-sphinx-theme.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf">
<script id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/language_data.js"></script>
<script src="../../../_static/clipboard.min.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "tex2jax_ignore|mathjax_ignore|document", "processClass": "tex2jax_process|mathjax_process|math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/pandas/window.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="None">
<!-- Google Analytics -->
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<div class="container-fluid" id="banner"></div>
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main"><div class="container-xl">
<div id="navbar-start">
<a class="navbar-brand" href="../../../index.html">
<img src="../../../_static/spark-logo-reverse.png" class="logo" alt="logo">
</a>
</div>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-collapsible" aria-controls="navbar-collapsible" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-collapsible" class="col-lg-9 collapse navbar-collapse">
<div id="navbar-center" class="mr-auto">
<div class="navbar-center-item">
<ul id="navbar-main-elements" class="navbar-nav">
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../index.html">
Overview
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../getting_started/index.html">
Getting Started
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../user_guide/index.html">
User Guides
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../reference/index.html">
API Reference
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../development/index.html">
Development
</a>
</li>
<li class="toctree-l1 nav-item">
<a class="reference internal nav-link" href="../../../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</div>
</div>
<div id="navbar-end">
<div class="navbar-end-item">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
3.5.1
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "_modules/pyspark/pandas/window.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script>
</div>
</div>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<!-- Only show if we have sidebars configured, else just a small margin -->
<div class="col-12 col-md-3 bd-sidebar">
<div class="sidebar-start-items"><form class="bd-search d-flex align-items-center" action="../../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form><nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
</div>
</nav>
</div>
<div class="sidebar-end-items">
</div>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<h1>Source code for pyspark.pandas.window</h1><div class="highlight"><pre>
<span></span><span class="c1">#</span>
<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
<span class="c1"># this work for additional information regarding copyright ownership.</span>
<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
<span class="c1"># (the &quot;License&quot;); you may not use this file except in compliance with</span>
<span class="c1"># the License. You may obtain a copy of the License at</span>
<span class="c1">#</span>
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="c1">#</span>
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
<span class="c1"># See the License for the specific language governing permissions and</span>
<span class="c1"># limitations under the License.</span>
<span class="c1">#</span>
<span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span><span class="p">,</span> <span class="n">abstractmethod</span>
<span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="n">partial</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Callable</span><span class="p">,</span> <span class="n">Generic</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Optional</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">Window</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">F</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.missing.window</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">MissingPandasLikeRolling</span><span class="p">,</span>
<span class="n">MissingPandasLikeRollingGroupby</span><span class="p">,</span>
<span class="n">MissingPandasLikeExpanding</span><span class="p">,</span>
<span class="n">MissingPandasLikeExpandingGroupby</span><span class="p">,</span>
<span class="n">MissingPandasLikeExponentialMoving</span><span class="p">,</span>
<span class="n">MissingPandasLikeExponentialMovingGroupby</span><span class="p">,</span>
<span class="p">)</span>
<span class="c1"># For running doctests and reference resolution in PyCharm.</span>
<span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">pandas</span> <span class="k">as</span> <span class="n">ps</span> <span class="c1"># noqa: F401</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas._typing</span> <span class="kn">import</span> <span class="n">FrameLike</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.groupby</span> <span class="kn">import</span> <span class="n">GroupBy</span><span class="p">,</span> <span class="n">DataFrameGroupBy</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.internal</span> <span class="kn">import</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">,</span> <span class="n">SPARK_INDEX_NAME_FORMAT</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.spark</span> <span class="kn">import</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">SF</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.utils</span> <span class="kn">import</span> <span class="n">scol_for</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.column</span> <span class="kn">import</span> <span class="n">Column</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">DoubleType</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.window</span> <span class="kn">import</span> <span class="n">WindowSpec</span>
<span class="k">class</span> <span class="nc">RollingAndExpanding</span><span class="p">(</span><span class="n">Generic</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">],</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">window</span><span class="p">:</span> <span class="n">WindowSpec</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_window</span> <span class="o">=</span> <span class="n">window</span>
<span class="c1"># This unbounded Window is later used to handle &#39;min_periods&#39; for now.</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span> <span class="o">=</span> <span class="n">min_periods</span>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">_apply_as_series_or_frame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Column</span><span class="p">],</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Wraps a function that handles Spark column in order</span>
<span class="sd"> to support it in both pandas-on-Spark Series and DataFrame.</span>
<span class="sd"> Note that the given `func` name should be same as the API&#39;s method name.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">pass</span>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">pass</span>
<span class="k">def</span> <span class="nf">sum</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">sum</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="nb">sum</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">min</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">min</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="nb">min</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">max</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">max</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="nb">max</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">mean</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">q</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10000</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">percentile_approx</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">DoubleType</span><span class="p">()),</span> <span class="n">q</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">quantile</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">std</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">std</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">stddev</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">std</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">var</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">var</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">variance</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">var</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">skew</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">skew</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">SF</span><span class="o">.</span><span class="n">skew</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">skew</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">kurt</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">kurt</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">SF</span><span class="o">.</span><span class="n">kurt</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">kurt</span><span class="p">)</span>
<span class="k">class</span> <span class="nc">RollingLike</span><span class="p">(</span><span class="n">RollingAndExpanding</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">window</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
<span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">):</span>
<span class="k">if</span> <span class="n">window</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;window must be &gt;= 0&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="p">(</span><span class="n">min_periods</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="n">min_periods</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;min_periods must be &gt;= 0&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">min_periods</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># TODO: &#39;min_periods&#39; is not equivalent in pandas because it does not count NA as</span>
<span class="c1"># a value.</span>
<span class="n">min_periods</span> <span class="o">=</span> <span class="n">window</span>
<span class="n">window_spec</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span> <span class="o">-</span> <span class="p">(</span><span class="n">window</span> <span class="o">-</span> <span class="mi">1</span><span class="p">),</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span>
<span class="p">)</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">window_spec</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">count</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="k">class</span> <span class="nc">Rolling</span><span class="p">(</span><span class="n">RollingLike</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">psdf_or_psser</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span>
<span class="n">window</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
<span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">):</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.frame</span> <span class="kn">import</span> <span class="n">DataFrame</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.series</span> <span class="kn">import</span> <span class="n">Series</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">window</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf_or_psser</span> <span class="o">=</span> <span class="n">psdf_or_psser</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psdf_or_psser</span><span class="p">,</span> <span class="p">(</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">Series</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;psdf_or_psser must be a series or dataframe; however, got: </span><span class="si">%s</span><span class="s2">&quot;</span>
<span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">psdf_or_psser</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">MissingPandasLikeRolling</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
<span class="n">property_or_func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">MissingPandasLikeRolling</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="nb">property</span><span class="p">):</span>
<span class="k">return</span> <span class="n">property_or_func</span><span class="o">.</span><span class="n">fget</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">partial</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_apply_as_series_or_frame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Column</span><span class="p">],</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf_or_psser</span><span class="o">.</span><span class="n">_apply_series_op</span><span class="p">(</span>
<span class="k">lambda</span> <span class="n">psser</span><span class="p">:</span> <span class="n">psser</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">func</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)),</span> <span class="c1"># TODO: dtype?</span>
<span class="n">should_resolve</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Rolling.count"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Rolling.count.html#pyspark.pandas.window.Rolling.count">[docs]</a> <span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The rolling count of any non-NaN observations inside the window.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Return type is the same as the original object with `np.float64` dtype.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.count : Count of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.count : Count of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 3, float(&quot;nan&quot;), 10])</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(1).count()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 2 0.0</span>
<span class="sd"> 3 1.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).count()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.to_frame().rolling(1).count()</span>
<span class="sd"> 0</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 2 0.0</span>
<span class="sd"> 3 1.0</span>
<span class="sd"> &gt;&gt;&gt; s.to_frame().rolling(3).count()</span>
<span class="sd"> 0</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">count</span><span class="p">()</span></div>
<div class="viewcode-block" id="Rolling.sum"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Rolling.sum.html#pyspark.pandas.window.Rolling.sum">[docs]</a> <span class="k">def</span> <span class="nf">sum</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate rolling summation of given DataFrame or Series.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Same type as the input, with the same index, containing the</span>
<span class="sd"> rolling summation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.sum : Reducing sum for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.sum : Reducing sum for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 3, 5, 2, 6])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 4</span>
<span class="sd"> 1 3</span>
<span class="sd"> 2 5</span>
<span class="sd"> 3 2</span>
<span class="sd"> 4 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(2).sum()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 7.0</span>
<span class="sd"> 2 8.0</span>
<span class="sd"> 3 7.0</span>
<span class="sd"> 4 8.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).sum()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 12.0</span>
<span class="sd"> 3 10.0</span>
<span class="sd"> 4 13.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling summation is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> A B</span>
<span class="sd"> 0 4 16</span>
<span class="sd"> 1 3 9</span>
<span class="sd"> 2 5 25</span>
<span class="sd"> 3 2 4</span>
<span class="sd"> 4 6 36</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(2).sum()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 7.0 25.0</span>
<span class="sd"> 2 8.0 34.0</span>
<span class="sd"> 3 7.0 29.0</span>
<span class="sd"> 4 8.0 40.0</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(3).sum()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 12.0 50.0</span>
<span class="sd"> 3 10.0 38.0</span>
<span class="sd"> 4 13.0 65.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span></div>
<div class="viewcode-block" id="Rolling.min"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Rolling.min.html#pyspark.pandas.window.Rolling.min">[docs]</a> <span class="k">def</span> <span class="nf">min</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the rolling minimum.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with a Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with a DataFrame.</span>
<span class="sd"> pyspark.pandas.Series.min : Similar method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.min : Similar method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 3, 5, 2, 6])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 4</span>
<span class="sd"> 1 3</span>
<span class="sd"> 2 5</span>
<span class="sd"> 3 2</span>
<span class="sd"> 4 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(2).min()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 3.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> 4 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).min()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> 4 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling minimum is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> A B</span>
<span class="sd"> 0 4 16</span>
<span class="sd"> 1 3 9</span>
<span class="sd"> 2 5 25</span>
<span class="sd"> 3 2 4</span>
<span class="sd"> 4 6 36</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(2).min()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 3.0 9.0</span>
<span class="sd"> 2 3.0 9.0</span>
<span class="sd"> 3 2.0 4.0</span>
<span class="sd"> 4 2.0 4.0</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(3).min()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 3.0 9.0</span>
<span class="sd"> 3 2.0 4.0</span>
<span class="sd"> 4 2.0 4.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">min</span><span class="p">()</span></div>
<div class="viewcode-block" id="Rolling.max"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Rolling.max.html#pyspark.pandas.window.Rolling.max">[docs]</a> <span class="k">def</span> <span class="nf">max</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the rolling maximum.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Return type is determined by the caller.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Series rolling.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : DataFrame rolling.</span>
<span class="sd"> pyspark.pandas.Series.max : Similar method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.max : Similar method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 3, 5, 2, 6])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 4</span>
<span class="sd"> 1 3</span>
<span class="sd"> 2 5</span>
<span class="sd"> 3 2</span>
<span class="sd"> 4 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(2).max()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 2 5.0</span>
<span class="sd"> 3 5.0</span>
<span class="sd"> 4 6.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).max()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 5.0</span>
<span class="sd"> 3 5.0</span>
<span class="sd"> 4 6.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling maximum is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> A B</span>
<span class="sd"> 0 4 16</span>
<span class="sd"> 1 3 9</span>
<span class="sd"> 2 5 25</span>
<span class="sd"> 3 2 4</span>
<span class="sd"> 4 6 36</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(2).max()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 4.0 16.0</span>
<span class="sd"> 2 5.0 25.0</span>
<span class="sd"> 3 5.0 25.0</span>
<span class="sd"> 4 6.0 36.0</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(3).max()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 5.0 25.0</span>
<span class="sd"> 3 5.0 25.0</span>
<span class="sd"> 4 6.0 36.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span></div>
<div class="viewcode-block" id="Rolling.mean"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Rolling.mean.html#pyspark.pandas.window.Rolling.mean">[docs]</a> <span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the rolling mean of the values.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.mean : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.mean : Equivalent method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 3, 5, 2, 6])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 4</span>
<span class="sd"> 1 3</span>
<span class="sd"> 2 5</span>
<span class="sd"> 3 2</span>
<span class="sd"> 4 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(2).mean()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 3.5</span>
<span class="sd"> 2 4.0</span>
<span class="sd"> 3 3.5</span>
<span class="sd"> 4 4.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).mean()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 4.000000</span>
<span class="sd"> 3 3.333333</span>
<span class="sd"> 4 4.333333</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling mean is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> A B</span>
<span class="sd"> 0 4 16</span>
<span class="sd"> 1 3 9</span>
<span class="sd"> 2 5 25</span>
<span class="sd"> 3 2 4</span>
<span class="sd"> 4 6 36</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(2).mean()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 3.5 12.5</span>
<span class="sd"> 2 4.0 17.0</span>
<span class="sd"> 3 3.5 14.5</span>
<span class="sd"> 4 4.0 20.0</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(3).mean()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 4.000000 16.666667</span>
<span class="sd"> 3 3.333333 12.666667</span>
<span class="sd"> 4 4.333333 21.666667</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
<div class="viewcode-block" id="Rolling.quantile"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Rolling.quantile.html#pyspark.pandas.window.Rolling.quantile">[docs]</a> <span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">quantile</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10000</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the rolling quantile of the values.</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> quantile : float</span>
<span class="sd"> Value between 0 and 1 providing the quantile to compute.</span>
<span class="sd"> accuracy : int, optional</span>
<span class="sd"> Default accuracy of approximation. Larger value means better accuracy.</span>
<span class="sd"> The relative error can be deduced by 1.0 / accuracy.</span>
<span class="sd"> This is a panda-on-Spark specific parameter.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> `quantile` in pandas-on-Spark are using distributed percentile approximation</span>
<span class="sd"> algorithm unlike pandas, the result might be different with pandas, also `interpolation`</span>
<span class="sd"> parameter is not supported yet.</span>
<span class="sd"> the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling rolling with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling rolling with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.quantile : Aggregating quantile for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.quantile : Aggregating quantile for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 3, 5, 2, 6])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 4</span>
<span class="sd"> 1 3</span>
<span class="sd"> 2 5</span>
<span class="sd"> 3 2</span>
<span class="sd"> 4 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(2).quantile(0.5)</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 3.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> 4 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).quantile(0.5)</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 4.0</span>
<span class="sd"> 3 3.0</span>
<span class="sd"> 4 5.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling quantile is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> A B</span>
<span class="sd"> 0 4 16</span>
<span class="sd"> 1 3 9</span>
<span class="sd"> 2 5 25</span>
<span class="sd"> 3 2 4</span>
<span class="sd"> 4 6 36</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(2).quantile(0.5)</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 3.0 9.0</span>
<span class="sd"> 2 3.0 9.0</span>
<span class="sd"> 3 2.0 4.0</span>
<span class="sd"> 4 2.0 4.0</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(3).quantile(0.5)</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 4.0 16.0</span>
<span class="sd"> 3 3.0 9.0</span>
<span class="sd"> 4 5.0 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="n">quantile</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">std</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate rolling standard deviation.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).std()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 0.577350</span>
<span class="sd"> 3 1.000000</span>
<span class="sd"> 4 1.000000</span>
<span class="sd"> 5 1.154701</span>
<span class="sd"> 6 0.000000</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling standard deviation is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(2).std()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 0.000000 0.000000</span>
<span class="sd"> 2 0.707107 7.778175</span>
<span class="sd"> 3 0.707107 9.192388</span>
<span class="sd"> 4 1.414214 16.970563</span>
<span class="sd"> 5 0.000000 0.000000</span>
<span class="sd"> 6 0.000000 0.000000</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">std</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">var</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased rolling variance.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.rolling : Calling object with Series data.</span>
<span class="sd"> DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> Series.var : Equivalent method for Series.</span>
<span class="sd"> DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).var()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 0.333333</span>
<span class="sd"> 3 1.000000</span>
<span class="sd"> 4 1.000000</span>
<span class="sd"> 5 1.333333</span>
<span class="sd"> 6 0.000000</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each unbiased rolling variance is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(2).var()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 0.0 0.0</span>
<span class="sd"> 2 0.5 60.5</span>
<span class="sd"> 3 0.5 84.5</span>
<span class="sd"> 4 2.0 288.0</span>
<span class="sd"> 5 0.0 0.0</span>
<span class="sd"> 6 0.0 0.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">var</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">skew</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased rolling skew.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 1, 5, 9])</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).skew()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 1.732051</span>
<span class="sd"> 3 0.000000</span>
<span class="sd"> 4 0.000000</span>
<span class="sd"> 5 -0.935220</span>
<span class="sd"> 6 -1.732051</span>
<span class="sd"> 7 0.000000</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling standard deviation is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(5).skew()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 NaN NaN</span>
<span class="sd"> 3 NaN NaN</span>
<span class="sd"> 4 1.257788 1.369456</span>
<span class="sd"> 5 -1.492685 -0.526039</span>
<span class="sd"> 6 -1.492685 -0.526039</span>
<span class="sd"> 7 -0.551618 0.686072</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">skew</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">kurt</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased rolling kurtosis.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.var : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 1, 5, 9])</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(4).kurt()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 NaN</span>
<span class="sd"> 3 -1.289256</span>
<span class="sd"> 4 -1.289256</span>
<span class="sd"> 5 2.234867</span>
<span class="sd"> 6 2.227147</span>
<span class="sd"> 7 1.500000</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each unbiased rolling variance is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(5).kurt()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 NaN NaN</span>
<span class="sd"> 3 NaN NaN</span>
<span class="sd"> 4 0.312500 0.906336</span>
<span class="sd"> 5 2.818047 1.016942</span>
<span class="sd"> 6 2.818047 1.016942</span>
<span class="sd"> 7 0.867769 0.389750</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">kurt</span><span class="p">()</span>
<span class="k">class</span> <span class="nc">RollingGroupby</span><span class="p">(</span><span class="n">RollingLike</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">groupby</span><span class="p">:</span> <span class="n">GroupBy</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">],</span>
<span class="n">window</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
<span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">window</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_groupby</span> <span class="o">=</span> <span class="n">groupby</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_window</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">ser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="k">for</span> <span class="n">ser</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span>
<span class="o">*</span><span class="p">[</span><span class="n">ser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="k">for</span> <span class="n">ser</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">]</span>
<span class="p">)</span>
<span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">MissingPandasLikeRollingGroupby</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
<span class="n">property_or_func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">MissingPandasLikeRollingGroupby</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="nb">property</span><span class="p">):</span>
<span class="k">return</span> <span class="n">property_or_func</span><span class="o">.</span><span class="n">fget</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">partial</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_apply_as_series_or_frame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Column</span><span class="p">],</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Wraps a function that handles Spark column in order</span>
<span class="sd"> to support it in both pandas-on-Spark Series and DataFrame.</span>
<span class="sd"> Note that the given `func` name should be same as the API&#39;s method name.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas</span> <span class="kn">import</span> <span class="n">DataFrame</span>
<span class="n">groupby</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_groupby</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_psdf</span>
<span class="c1"># Here we need to include grouped key as an index, and shift previous index.</span>
<span class="c1"># [index_column0, index_column1] -&gt; [grouped key, index_column0, index_column1]</span>
<span class="n">new_index_scols</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Column</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">new_index_spark_column_names</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">new_index_names</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">new_index_fields</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">groupkey</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">:</span>
<span class="n">index_column_name</span> <span class="o">=</span> <span class="n">SPARK_INDEX_NAME_FORMAT</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">new_index_scols</span><span class="p">))</span>
<span class="n">new_index_scols</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">groupkey</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">index_column_name</span><span class="p">))</span>
<span class="n">new_index_spark_column_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index_column_name</span><span class="p">)</span>
<span class="n">new_index_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">groupkey</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="n">new_index_fields</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">groupkey</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">index_column_name</span><span class="p">))</span>
<span class="k">for</span> <span class="n">new_index_scol</span><span class="p">,</span> <span class="n">index_name</span><span class="p">,</span> <span class="n">index_field</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">,</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_names</span><span class="p">,</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_fields</span><span class="p">,</span>
<span class="p">):</span>
<span class="n">index_column_name</span> <span class="o">=</span> <span class="n">SPARK_INDEX_NAME_FORMAT</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">new_index_scols</span><span class="p">))</span>
<span class="n">new_index_scols</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">new_index_scol</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">index_column_name</span><span class="p">))</span>
<span class="n">new_index_spark_column_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index_column_name</span><span class="p">)</span>
<span class="n">new_index_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index_name</span><span class="p">)</span>
<span class="n">new_index_fields</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index_field</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">index_column_name</span><span class="p">))</span>
<span class="k">if</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_agg_columns_selected</span><span class="p">:</span>
<span class="n">agg_columns</span> <span class="o">=</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_agg_columns</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># pandas doesn&#39;t keep the groupkey as a column from 1.3 for DataFrameGroupBy</span>
<span class="n">column_labels_to_exclude</span> <span class="o">=</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_column_labels_to_exclude</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">groupby</span><span class="p">,</span> <span class="n">DataFrameGroupBy</span><span class="p">):</span>
<span class="k">for</span> <span class="n">groupkey</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">:</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="n">column_labels_to_exclude</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">groupkey</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="n">agg_columns</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">label</span><span class="p">)</span>
<span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span>
<span class="k">if</span> <span class="n">label</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">column_labels_to_exclude</span>
<span class="p">]</span>
<span class="n">applied</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">agg_column</span> <span class="ow">in</span> <span class="n">agg_columns</span><span class="p">:</span>
<span class="n">applied</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">agg_column</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">func</span><span class="p">(</span><span class="n">agg_column</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)))</span> <span class="c1"># TODO: dtype?</span>
<span class="c1"># Seems like pandas filters out when grouped key is NA.</span>
<span class="n">cond</span> <span class="o">=</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">()</span>
<span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">[</span><span class="mi">1</span><span class="p">:]:</span>
<span class="n">cond</span> <span class="o">=</span> <span class="n">cond</span> <span class="o">|</span> <span class="n">c</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">()</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">cond</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="n">new_index_scols</span> <span class="o">+</span> <span class="p">[</span><span class="n">c</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">applied</span><span class="p">]</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">new_index_spark_column_names</span><span class="p">],</span>
<span class="n">index_names</span><span class="o">=</span><span class="n">new_index_names</span><span class="p">,</span>
<span class="n">index_fields</span><span class="o">=</span><span class="n">new_index_fields</span><span class="p">,</span>
<span class="n">column_labels</span><span class="o">=</span><span class="p">[</span><span class="n">c</span><span class="o">.</span><span class="n">_column_label</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">applied</span><span class="p">],</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span>
<span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">c</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">applied</span>
<span class="p">],</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span><span class="n">c</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">applied</span><span class="p">],</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_handle_output</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The rolling count of any non-NaN observations inside the window.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.count : Count of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.count : Count of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).rolling(3).count().sort_index()</span>
<span class="sd"> 2 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 3 2 1.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 1.0</span>
<span class="sd"> 6 2.0</span>
<span class="sd"> 7 3.0</span>
<span class="sd"> 8 3.0</span>
<span class="sd"> 5 9 1.0</span>
<span class="sd"> 10 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling count is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).rolling(2).count().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 3 2 1.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> 4 2.0</span>
<span class="sd"> 4 5 1.0</span>
<span class="sd"> 6 2.0</span>
<span class="sd"> 7 2.0</span>
<span class="sd"> 8 2.0</span>
<span class="sd"> 5 9 1.0</span>
<span class="sd"> 10 2.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">count</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">sum</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The rolling summation of any non-NaN observations inside the window.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.sum : Sum of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.sum : Sum of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).rolling(3).sum().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 12.0</span>
<span class="sd"> 8 12.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling summation is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).rolling(2).sum().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 8.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 18.0</span>
<span class="sd"> 4 18.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 32.0</span>
<span class="sd"> 7 32.0</span>
<span class="sd"> 8 32.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 50.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">min</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The rolling minimum of any non-NaN observations inside the window.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.min : Min of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.min : Min of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).rolling(3).min().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling minimum is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).rolling(2).min().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">min</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">max</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The rolling maximum of any non-NaN observations inside the window.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.max : Max of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.max : Max of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).rolling(3).max().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling maximum is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).rolling(2).max().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The rolling mean of any non-NaN observations inside the window.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.mean : Mean of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.mean : Mean of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).rolling(3).mean().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling mean is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).rolling(2).mean().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">quantile</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10000</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate rolling quantile.</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> quantile : float</span>
<span class="sd"> Value between 0 and 1 providing the quantile to compute.</span>
<span class="sd"> accuracy : int, optional</span>
<span class="sd"> Default accuracy of approximation. Larger value means better accuracy.</span>
<span class="sd"> The relative error can be deduced by 1.0 / accuracy.</span>
<span class="sd"> This is a panda-on-Spark specific parameter.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> `quantile` in pandas-on-Spark are using distributed percentile approximation</span>
<span class="sd"> algorithm unlike pandas, the result might be different with pandas, also `interpolation`</span>
<span class="sd"> parameter is not supported yet.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling rolling with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling rolling with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.quantile : Aggregating quantile for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.quantile : Aggregating quantile for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).rolling(3).quantile(0.5).sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling quantile is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).rolling(2).quantile(0.5).sort_index()</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="n">quantile</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">std</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate rolling standard deviation.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">std</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">var</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased rolling variance.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.var : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">var</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">skew</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased rolling skew.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">skew</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">kurt</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased rolling kurtosis.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.var : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">kurt</span><span class="p">()</span>
<span class="k">class</span> <span class="nc">ExpandingLike</span><span class="p">(</span><span class="n">RollingAndExpanding</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">):</span>
<span class="k">if</span> <span class="n">min_periods</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;min_periods must be &gt;= 0&quot;</span><span class="p">)</span>
<span class="n">window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span>
<span class="p">)</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">window</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">count</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="k">class</span> <span class="nc">Expanding</span><span class="p">(</span><span class="n">ExpandingLike</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">psdf_or_psser</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">):</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.frame</span> <span class="kn">import</span> <span class="n">DataFrame</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.series</span> <span class="kn">import</span> <span class="n">Series</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">min_periods</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psdf_or_psser</span><span class="p">,</span> <span class="p">(</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">Series</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;psdf_or_psser must be a series or dataframe; however, got: </span><span class="si">%s</span><span class="s2">&quot;</span>
<span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">psdf_or_psser</span><span class="p">)</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf_or_psser</span> <span class="o">=</span> <span class="n">psdf_or_psser</span>
<span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">MissingPandasLikeExpanding</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
<span class="n">property_or_func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">MissingPandasLikeExpanding</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="nb">property</span><span class="p">):</span>
<span class="k">return</span> <span class="n">property_or_func</span><span class="o">.</span><span class="n">fget</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">partial</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="c1"># TODO: when add &#39;axis&#39; parameter, should add to here too.</span>
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="k">return</span> <span class="s2">&quot;Expanding [min_periods=</span><span class="si">{}</span><span class="s2">]&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">)</span>
<span class="n">_apply_as_series_or_frame</span> <span class="o">=</span> <span class="n">Rolling</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span>
<div class="viewcode-block" id="Expanding.count"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Expanding.count.html#pyspark.pandas.window.Expanding.count">[docs]</a> <span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The expanding count of any non-NaN observations inside the window.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.count : Count of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.count : Count of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 3, float(&quot;nan&quot;), 10])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding().count()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 3.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.to_frame().expanding().count()</span>
<span class="sd"> 0</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 3.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">count</span><span class="p">()</span></div>
<div class="viewcode-block" id="Expanding.sum"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Expanding.sum.html#pyspark.pandas.window.Expanding.sum">[docs]</a> <span class="k">def</span> <span class="nf">sum</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate expanding summation of given DataFrame or Series.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Same type as the input, with the same index, containing the</span>
<span class="sd"> expanding summation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.sum : Reducing sum for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.sum : Reducing sum for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4, 5])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> 4 5</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).sum()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 6.0</span>
<span class="sd"> 3 10.0</span>
<span class="sd"> 4 15.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding summation is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> A B</span>
<span class="sd"> 0 1 1</span>
<span class="sd"> 1 2 4</span>
<span class="sd"> 2 3 9</span>
<span class="sd"> 3 4 16</span>
<span class="sd"> 4 5 25</span>
<span class="sd"> &gt;&gt;&gt; df.expanding(3).sum()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 6.0 14.0</span>
<span class="sd"> 3 10.0 30.0</span>
<span class="sd"> 4 15.0 55.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span></div>
<div class="viewcode-block" id="Expanding.min"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Expanding.min.html#pyspark.pandas.window.Expanding.min">[docs]</a> <span class="k">def</span> <span class="nf">min</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding minimum.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with a Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with a DataFrame.</span>
<span class="sd"> pyspark.pandas.Series.min : Similar method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.min : Similar method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Performing a expanding minimum with a window size of 3.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 3, 5, 2, 6])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).min()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> 4 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">min</span><span class="p">()</span></div>
<div class="viewcode-block" id="Expanding.max"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Expanding.max.html#pyspark.pandas.window.Expanding.max">[docs]</a> <span class="k">def</span> <span class="nf">max</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding maximum.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Return type is determined by the caller.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.max : Similar method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.max : Similar method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Performing a expanding minimum with a window size of 3.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 3, 5, 2, 6])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).max()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 5.0</span>
<span class="sd"> 3 5.0</span>
<span class="sd"> 4 6.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span></div>
<div class="viewcode-block" id="Expanding.mean"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Expanding.mean.html#pyspark.pandas.window.Expanding.mean">[docs]</a> <span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding mean of the values.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.mean : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.mean : Equivalent method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> The below examples will show expanding mean calculations with window sizes of</span>
<span class="sd"> two and three, respectively.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(2).mean()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 1.5</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 2.5</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).mean()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 2.5</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
<div class="viewcode-block" id="Expanding.quantile"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Expanding.quantile.html#pyspark.pandas.window.Expanding.quantile">[docs]</a> <span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">quantile</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10000</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding quantile of the values.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> quantile : float</span>
<span class="sd"> Value between 0 and 1 providing the quantile to compute.</span>
<span class="sd"> accuracy : int, optional</span>
<span class="sd"> Default accuracy of approximation. Larger value means better accuracy.</span>
<span class="sd"> The relative error can be deduced by 1.0 / accuracy.</span>
<span class="sd"> This is a panda-on-Spark specific parameter.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> `quantile` in pandas-on-Spark are using distributed percentile approximation</span>
<span class="sd"> algorithm unlike pandas, the result might be different with pandas (the result is</span>
<span class="sd"> similar to the interpolation set to `lower`), also `interpolation` parameter is</span>
<span class="sd"> not supported yet.</span>
<span class="sd"> the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling expanding with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling expanding with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.quantile : Aggregating quantile for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.quantile : Aggregating quantile for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> The below examples will show expanding quantile calculations with window sizes of</span>
<span class="sd"> two and three, respectively.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(2).quantile(0.5)</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).quantile(0.5)</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="n">quantile</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">std</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate expanding standard deviation.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).std()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 0.577350</span>
<span class="sd"> 3 0.957427</span>
<span class="sd"> 4 0.894427</span>
<span class="sd"> 5 0.836660</span>
<span class="sd"> 6 0.786796</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding standard deviation variance is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.expanding(2).std()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 0.000000 0.000000</span>
<span class="sd"> 2 0.577350 6.350853</span>
<span class="sd"> 3 0.957427 11.412712</span>
<span class="sd"> 4 0.894427 10.630146</span>
<span class="sd"> 5 0.836660 9.928075</span>
<span class="sd"> 6 0.786796 9.327379</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">std</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">var</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased expanding variance.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.var : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).var()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 0.333333</span>
<span class="sd"> 3 0.916667</span>
<span class="sd"> 4 0.800000</span>
<span class="sd"> 5 0.700000</span>
<span class="sd"> 6 0.619048</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each unbiased expanding variance is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.expanding(2).var()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 0.000000 0.000000</span>
<span class="sd"> 2 0.333333 40.333333</span>
<span class="sd"> 3 0.916667 130.250000</span>
<span class="sd"> 4 0.800000 113.000000</span>
<span class="sd"> 5 0.700000 98.566667</span>
<span class="sd"> 6 0.619048 87.000000</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">var</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">skew</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased expanding skew.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 1, 5, 9])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).skew()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 1.732051</span>
<span class="sd"> 3 0.854563</span>
<span class="sd"> 4 1.257788</span>
<span class="sd"> 5 -1.571593</span>
<span class="sd"> 6 -1.657542</span>
<span class="sd"> 7 -0.521760</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding standard deviation variance is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.expanding(5).skew()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 NaN NaN</span>
<span class="sd"> 3 NaN NaN</span>
<span class="sd"> 4 1.257788 1.369456</span>
<span class="sd"> 5 -1.571593 -0.423309</span>
<span class="sd"> 6 -1.657542 -0.355737</span>
<span class="sd"> 7 -0.521760 1.116874</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">skew</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">kurt</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased expanding kurtosis.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.var : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 1, 5, 9])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(4).kurt()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 NaN</span>
<span class="sd"> 3 -1.289256</span>
<span class="sd"> 4 0.312500</span>
<span class="sd"> 5 3.419520</span>
<span class="sd"> 6 4.028185</span>
<span class="sd"> 7 2.230373</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each unbiased expanding variance is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.expanding(5).kurt()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 NaN NaN</span>
<span class="sd"> 3 NaN NaN</span>
<span class="sd"> 4 0.312500 0.906336</span>
<span class="sd"> 5 3.419520 1.486581</span>
<span class="sd"> 6 4.028185 1.936169</span>
<span class="sd"> 7 2.230373 2.273792</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">kurt</span><span class="p">()</span>
<span class="k">class</span> <span class="nc">ExpandingGroupby</span><span class="p">(</span><span class="n">ExpandingLike</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">groupby</span><span class="p">:</span> <span class="n">GroupBy</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">],</span> <span class="n">min_periods</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">min_periods</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_groupby</span> <span class="o">=</span> <span class="n">groupby</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_window</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">ser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="k">for</span> <span class="n">ser</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span>
<span class="o">*</span><span class="p">[</span><span class="n">ser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="k">for</span> <span class="n">ser</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">]</span>
<span class="p">)</span>
<span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">MissingPandasLikeExpandingGroupby</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
<span class="n">property_or_func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">MissingPandasLikeExpandingGroupby</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="nb">property</span><span class="p">):</span>
<span class="k">return</span> <span class="n">property_or_func</span><span class="o">.</span><span class="n">fget</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">partial</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="n">_apply_as_series_or_frame</span> <span class="o">=</span> <span class="n">RollingGroupby</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The expanding count of any non-NaN observations inside the window.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.count : Count of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.count : Count of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).expanding(3).count().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 3.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding count is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).expanding(2).count().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 2.0</span>
<span class="sd"> 7 3.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 2.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">count</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">sum</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate expanding summation of given DataFrame or Series.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Same type as the input, with the same index, containing the</span>
<span class="sd"> expanding summation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.sum : Reducing sum for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.sum : Reducing sum for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).expanding(3).sum().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 12.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding summation is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).expanding(2).sum().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 8.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 18.0</span>
<span class="sd"> 4 27.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 32.0</span>
<span class="sd"> 7 48.0</span>
<span class="sd"> 8 64.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 50.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">min</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding minimum.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with a Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with a DataFrame.</span>
<span class="sd"> pyspark.pandas.Series.min : Similar method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.min : Similar method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).expanding(3).min().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding minimum is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).expanding(2).min().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">min</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">max</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding maximum.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Return type is determined by the caller.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.max : Similar method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.max : Similar method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).expanding(3).max().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding maximum is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).expanding(2).max().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding mean of the values.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.mean : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.mean : Equivalent method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).expanding(3).mean().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding mean is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).expanding(2).mean().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">quantile</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10000</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding quantile of the values.</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> quantile : float</span>
<span class="sd"> Value between 0 and 1 providing the quantile to compute.</span>
<span class="sd"> accuracy : int, optional</span>
<span class="sd"> Default accuracy of approximation. Larger value means better accuracy.</span>
<span class="sd"> The relative error can be deduced by 1.0 / accuracy.</span>
<span class="sd"> This is a panda-on-Spark specific parameter.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> `quantile` in pandas-on-Spark are using distributed percentile approximation</span>
<span class="sd"> algorithm unlike pandas, the result might be different with pandas, also `interpolation`</span>
<span class="sd"> parameter is not supported yet.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling expanding with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling expanding with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.quantile : Aggregating quantile for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.quantile : Aggregating quantile for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).expanding(3).quantile(0.5).sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding quantile is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).expanding(2).quantile(0.5).sort_index()</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="n">quantile</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">std</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate expanding standard deviation.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding: Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">std</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">var</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased expanding variance.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.var : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">var</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">skew</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate expanding standard skew.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding: Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">skew</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">kurt</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased expanding kurtosis.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.var : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">kurt</span><span class="p">()</span>
<span class="k">class</span> <span class="nc">ExponentialMovingLike</span><span class="p">(</span><span class="n">Generic</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">],</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">window</span><span class="p">:</span> <span class="n">WindowSpec</span><span class="p">,</span>
<span class="n">com</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">span</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">halflife</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">alpha</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ignore_na</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">):</span>
<span class="k">if</span> <span class="p">(</span><span class="n">min_periods</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="n">min_periods</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;min_periods must be &gt;= 0&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">min_periods</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">min_periods</span> <span class="o">=</span> <span class="mi">0</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span> <span class="o">=</span> <span class="n">min_periods</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_ignore_na</span> <span class="o">=</span> <span class="n">ignore_na</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_window</span> <span class="o">=</span> <span class="n">window</span>
<span class="c1"># This unbounded Window is later used to handle &#39;min_periods&#39; for now.</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span>
<span class="p">)</span>
<span class="k">if</span> <span class="p">(</span><span class="n">com</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="ow">not</span> <span class="n">com</span> <span class="o">&gt;=</span> <span class="mi">0</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;com must be &gt;= 0&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_com</span> <span class="o">=</span> <span class="n">com</span>
<span class="k">if</span> <span class="p">(</span><span class="n">span</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="ow">not</span> <span class="n">span</span> <span class="o">&gt;=</span> <span class="mi">1</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;span must be &gt;= 1&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_span</span> <span class="o">=</span> <span class="n">span</span>
<span class="k">if</span> <span class="p">(</span><span class="n">halflife</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="ow">not</span> <span class="n">halflife</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;halflife must be &gt; 0&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_halflife</span> <span class="o">=</span> <span class="n">halflife</span>
<span class="k">if</span> <span class="p">(</span><span class="n">alpha</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="ow">not</span> <span class="mi">0</span> <span class="o">&lt;</span> <span class="n">alpha</span> <span class="o">&lt;=</span> <span class="mi">1</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;alpha must be in (0, 1]&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_alpha</span> <span class="o">=</span> <span class="n">alpha</span>
<span class="k">def</span> <span class="nf">_compute_unified_alpha</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="n">unified_alpha</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="n">opt_count</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_com</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">unified_alpha</span> <span class="o">=</span> <span class="mf">1.0</span> <span class="o">/</span> <span class="p">(</span><span class="mi">1</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">_com</span><span class="p">)</span>
<span class="n">opt_count</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_span</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">unified_alpha</span> <span class="o">=</span> <span class="mf">2.0</span> <span class="o">/</span> <span class="p">(</span><span class="mi">1</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">_span</span><span class="p">)</span>
<span class="n">opt_count</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_halflife</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">unified_alpha</span> <span class="o">=</span> <span class="mf">1.0</span> <span class="o">-</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="o">-</span><span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">_halflife</span><span class="p">)</span>
<span class="n">opt_count</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_alpha</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">unified_alpha</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_alpha</span>
<span class="n">opt_count</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">if</span> <span class="n">opt_count</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Must pass one of com, span, halflife, or alpha&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">opt_count</span> <span class="o">!=</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;com, span, halflife, and alpha are mutually exclusive&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">unified_alpha</span>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">_apply_as_series_or_frame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Column</span><span class="p">],</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Wraps a function that handles Spark column in order</span>
<span class="sd"> to support it in both pandas-on-Spark Series and DataFrame.</span>
<span class="sd"> Note that the given `func` name should be same as the API&#39;s method name.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">pass</span>
<span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="n">unified_alpha</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_unified_alpha</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="n">col_ewm</span> <span class="o">=</span> <span class="n">SF</span><span class="o">.</span><span class="n">ewm</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">unified_alpha</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ignore_na</span><span class="p">)</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="o">~</span><span class="n">scol</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span>
<span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">col_ewm</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">mean</span><span class="p">)</span>
<span class="k">class</span> <span class="nc">ExponentialMoving</span><span class="p">(</span><span class="n">ExponentialMovingLike</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">psdf_or_psser</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span>
<span class="n">com</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">span</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">halflife</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">alpha</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ignore_na</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">):</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.frame</span> <span class="kn">import</span> <span class="n">DataFrame</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.series</span> <span class="kn">import</span> <span class="n">Series</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psdf_or_psser</span><span class="p">,</span> <span class="p">(</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">Series</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;psdf_or_psser must be a series or dataframe; however, got: </span><span class="si">%s</span><span class="s2">&quot;</span>
<span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">psdf_or_psser</span><span class="p">)</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf_or_psser</span> <span class="o">=</span> <span class="n">psdf_or_psser</span>
<span class="n">window_spec</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span>
<span class="p">)</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">window_spec</span><span class="p">,</span> <span class="n">com</span><span class="p">,</span> <span class="n">span</span><span class="p">,</span> <span class="n">halflife</span><span class="p">,</span> <span class="n">alpha</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">,</span> <span class="n">ignore_na</span><span class="p">)</span>
<span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">MissingPandasLikeExponentialMoving</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
<span class="n">property_or_func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">MissingPandasLikeExponentialMoving</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="nb">property</span><span class="p">):</span>
<span class="k">return</span> <span class="n">property_or_func</span><span class="o">.</span><span class="n">fget</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">partial</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="n">_apply_as_series_or_frame</span> <span class="o">=</span> <span class="n">Rolling</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span>
<div class="viewcode-block" id="ExponentialMoving.mean"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.ExponentialMoving.mean.html#pyspark.pandas.window.ExponentialMoving.mean">[docs]</a> <span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate an online exponentially weighted mean.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> There are behavior differences between pandas-on-Spark and pandas.</span>
<span class="sd"> * the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the exponentially</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.mean : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.mean : Equivalent method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> The below examples will show computing exponentially weighted moving average.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;s1&#39;: [.2, .0, .6, .2, .4, .5, .6], &#39;s2&#39;: [2, 1, 3, 1, 0, 0, 0]})</span>
<span class="sd"> &gt;&gt;&gt; df.ewm(com=0.1).mean()</span>
<span class="sd"> s1 s2</span>
<span class="sd"> 0 0.200000 2.000000</span>
<span class="sd"> 1 0.016667 1.083333</span>
<span class="sd"> 2 0.547368 2.827068</span>
<span class="sd"> 3 0.231557 1.165984</span>
<span class="sd"> 4 0.384688 0.105992</span>
<span class="sd"> 5 0.489517 0.009636</span>
<span class="sd"> 6 0.589956 0.000876</span>
<span class="sd"> &gt;&gt;&gt; df.s2.ewm(halflife=1.5, min_periods=3).mean()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 2.182572</span>
<span class="sd"> 3 1.663174</span>
<span class="sd"> 4 0.979949</span>
<span class="sd"> 5 0.593155</span>
<span class="sd"> 6 0.364668</span>
<span class="sd"> Name: s2, dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
<span class="c1"># TODO: when add &#39;adjust&#39; parameter, should add to here too.</span>
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="k">return</span> <span class="p">(</span>
<span class="s2">&quot;ExponentialMoving [com=</span><span class="si">{}</span><span class="s2">, span=</span><span class="si">{}</span><span class="s2">, halflife=</span><span class="si">{}</span><span class="s2">, alpha=</span><span class="si">{}</span><span class="s2">, &quot;</span>
<span class="s2">&quot;min_periods=</span><span class="si">{}</span><span class="s2">, ignore_na=</span><span class="si">{}</span><span class="s2">]&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_com</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_span</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_halflife</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_alpha</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_ignore_na</span><span class="p">,</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">class</span> <span class="nc">ExponentialMovingGroupby</span><span class="p">(</span><span class="n">ExponentialMovingLike</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">groupby</span><span class="p">:</span> <span class="n">GroupBy</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">],</span>
<span class="n">com</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">span</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">halflife</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">alpha</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ignore_na</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">):</span>
<span class="n">window_spec</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span>
<span class="p">)</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">window_spec</span><span class="p">,</span> <span class="n">com</span><span class="p">,</span> <span class="n">span</span><span class="p">,</span> <span class="n">halflife</span><span class="p">,</span> <span class="n">alpha</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">,</span> <span class="n">ignore_na</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_groupby</span> <span class="o">=</span> <span class="n">groupby</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_window</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">ser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="k">for</span> <span class="n">ser</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span>
<span class="o">*</span><span class="p">[</span><span class="n">ser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="k">for</span> <span class="n">ser</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">]</span>
<span class="p">)</span>
<span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">MissingPandasLikeExponentialMovingGroupby</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
<span class="n">property_or_func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">MissingPandasLikeExponentialMovingGroupby</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="nb">property</span><span class="p">):</span>
<span class="k">return</span> <span class="n">property_or_func</span><span class="o">.</span><span class="n">fget</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">partial</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="n">_apply_as_series_or_frame</span> <span class="o">=</span> <span class="n">RollingGroupby</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span>
<span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate an online exponentially weighted mean.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> There are behavior differences between pandas-on-Spark and pandas.</span>
<span class="sd"> * the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the exponentially</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.mean : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.mean : Equivalent method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).ewm(alpha=0.5).mean().sort_index()</span>
<span class="sd"> 2 0 2.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 3 2 3.0</span>
<span class="sd"> 3 3.0</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 4.0</span>
<span class="sd"> 6 4.0</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 5.0</span>
<span class="sd"> 10 5.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each ewm mean is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).ewm(alpha=0.5).mean().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 4.0</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 9.0</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 16.0</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 25.0</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="c1"># TODO: when add &#39;adjust&#39; parameter, should add to here too.</span>
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="k">return</span> <span class="p">(</span>
<span class="s2">&quot;ExponentialMovingGroupby [com=</span><span class="si">{}</span><span class="s2">, span=</span><span class="si">{}</span><span class="s2">, halflife=</span><span class="si">{}</span><span class="s2">, alpha=</span><span class="si">{}</span><span class="s2">, &quot;</span>
<span class="s2">&quot;min_periods=</span><span class="si">{}</span><span class="s2">, ignore_na=</span><span class="si">{}</span><span class="s2">]&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_com</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_span</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_halflife</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_alpha</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_ignore_na</span><span class="p">,</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">def</span> <span class="nf">_test</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">doctest</span>
<span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span>
<span class="kn">import</span> <span class="nn">pyspark.pandas.window</span>
<span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s2">&quot;SPARK_HOME&quot;</span><span class="p">])</span>
<span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span><span class="o">.</span><span class="n">window</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">globs</span><span class="p">[</span><span class="s2">&quot;ps&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span>
<span class="n">spark</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">master</span><span class="p">(</span><span class="s2">&quot;local[4]&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">&quot;pyspark.pandas.window tests&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span>
<span class="p">)</span>
<span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span>
<span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span><span class="o">.</span><span class="n">window</span><span class="p">,</span>
<span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span>
<span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span> <span class="o">|</span> <span class="n">doctest</span><span class="o">.</span><span class="n">NORMALIZE_WHITESPACE</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">spark</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span>
<span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span>
<span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="n">_test</span><span class="p">()</span>
</pre></div>
</div>
<!-- Previous / next buttons -->
<div class='prev-next-area'>
</div>
</main>
</div>
</div>
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=1999514e3f237ded88cf"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<div class="footer-item">
<p class="copyright">
&copy; Copyright .<br>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br>
</p>
</div>
</div>
</footer>
</body>
</html>