blob: 5996826ba21f254c0f84467659e0bfb0e2ad609f [file] [log] [blame]
<!DOCTYPE html>
<html >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>pyspark.pandas.window &#8212; PySpark 4.0.0-preview2 documentation</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../../../_static/styles/theme.css?digest=e353d410970836974a52" rel="stylesheet" />
<link href="../../../_static/styles/bootstrap.css?digest=e353d410970836974a52" rel="stylesheet" />
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=e353d410970836974a52" rel="stylesheet" />
<link href="../../../_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=e353d410970836974a52" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52" />
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/clipboard.min.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = '_modules/pyspark/pandas/window';</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/pandas/window.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="None">
<!-- Matomo -->
<script type="text/javascript">
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a class="skip-link" href="#main-content">Skip to main content</a>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../../../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<nav class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../../../index.html">
<img src="https://spark.apache.org/images/spark-logo.png" class="logo__image only-light" alt="Logo image"/>
<script>document.write(`<img src="https://spark.apache.org/images/spark-logo-rev.svg" class="logo__image only-dark" alt="Logo image"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item"><nav class="navbar-nav">
<p class="sidebar-header-items__title"
role="heading"
aria-level="1"
aria-label="Site Navigation">
Site Navigation
</p>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../index.html">
Overview
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../getting_started/index.html">
Getting Started
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../user_guide/index.html">
User Guides
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../reference/index.html">
API Reference
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../development/index.html">
Development
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
</button>
`);
</script>
</div>
<div class="navbar-item"><!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
4.0.0-preview2
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "_modules/pyspark/pandas/window.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span>
<span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span>
<span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span>
<label class="sr-only">GitHub</label></a>
</li>
<li class="nav-item">
<a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span>
<label class="sr-only">PyPI</label></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
</button>
`);
</script>
</div>
</div>
</nav>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar hide-on-wide">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item"><nav class="navbar-nav">
<p class="sidebar-header-items__title"
role="heading"
aria-level="1"
aria-label="Site Navigation">
Site Navigation
</p>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../index.html">
Overview
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../getting_started/index.html">
Getting Started
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../user_guide/index.html">
User Guides
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../reference/index.html">
API Reference
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../development/index.html">
Development
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item"><!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
4.0.0-preview2
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "_modules/pyspark/pandas/window.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span>
<span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span>
<span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span>
<label class="sr-only">GitHub</label></a>
</li>
<li class="nav-item">
<a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span>
<label class="sr-only">PyPI</label></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumbs">
<ul class="bd-breadcrumbs" role="navigation" aria-label="Breadcrumb">
<li class="breadcrumb-item breadcrumb-home">
<a href="../../../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="../../index.html" class="nav-link">Module code</a></li>
<li class="breadcrumb-item active" aria-current="page">pyspark.pandas.window</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article" role="main">
<h1>Source code for pyspark.pandas.window</h1><div class="highlight"><pre>
<span></span><span class="c1">#</span>
<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
<span class="c1"># this work for additional information regarding copyright ownership.</span>
<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
<span class="c1"># (the &quot;License&quot;); you may not use this file except in compliance with</span>
<span class="c1"># the License. You may obtain a copy of the License at</span>
<span class="c1">#</span>
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="c1">#</span>
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
<span class="c1"># See the License for the specific language governing permissions and</span>
<span class="c1"># limitations under the License.</span>
<span class="c1">#</span>
<span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span><span class="p">,</span> <span class="n">abstractmethod</span>
<span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="n">partial</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Any</span><span class="p">,</span> <span class="n">Callable</span><span class="p">,</span> <span class="n">Generic</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Optional</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">Window</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">F</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.missing.window</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">MissingPandasLikeRolling</span><span class="p">,</span>
<span class="n">MissingPandasLikeRollingGroupby</span><span class="p">,</span>
<span class="n">MissingPandasLikeExpanding</span><span class="p">,</span>
<span class="n">MissingPandasLikeExpandingGroupby</span><span class="p">,</span>
<span class="n">MissingPandasLikeExponentialMoving</span><span class="p">,</span>
<span class="n">MissingPandasLikeExponentialMovingGroupby</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">pandas</span> <span class="k">as</span> <span class="n">ps</span> <span class="c1"># noqa: F401</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas._typing</span> <span class="kn">import</span> <span class="n">FrameLike</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.groupby</span> <span class="kn">import</span> <span class="n">GroupBy</span><span class="p">,</span> <span class="n">DataFrameGroupBy</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.internal</span> <span class="kn">import</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">,</span> <span class="n">SPARK_INDEX_NAME_FORMAT</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.spark</span> <span class="kn">import</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">SF</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.utils</span> <span class="kn">import</span> <span class="n">scol_for</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.column</span> <span class="kn">import</span> <span class="n">Column</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">DoubleType</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.window</span> <span class="kn">import</span> <span class="n">WindowSpec</span>
<span class="k">class</span> <span class="nc">RollingAndExpanding</span><span class="p">(</span><span class="n">Generic</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">],</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">window</span><span class="p">:</span> <span class="n">WindowSpec</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_window</span> <span class="o">=</span> <span class="n">window</span>
<span class="c1"># This unbounded Window is later used to handle &#39;min_periods&#39; for now.</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span> <span class="o">=</span> <span class="n">min_periods</span>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">_apply_as_series_or_frame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Column</span><span class="p">],</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Wraps a function that handles Spark column in order</span>
<span class="sd"> to support it in both pandas-on-Spark Series and DataFrame.</span>
<span class="sd"> Note that the given `func` name should be same as the API&#39;s method name.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">pass</span>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">pass</span>
<span class="k">def</span> <span class="nf">sum</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">sum</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="nb">sum</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">min</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">min</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="nb">min</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">max</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">max</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="nb">max</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">mean</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">q</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10000</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">percentile_approx</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">DoubleType</span><span class="p">()),</span> <span class="n">q</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">quantile</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">std</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">std</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">stddev</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">std</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">var</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">var</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">variance</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">var</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">skew</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">skew</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">SF</span><span class="o">.</span><span class="n">skew</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">skew</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">kurt</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">kurt</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">SF</span><span class="o">.</span><span class="n">kurt</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">kurt</span><span class="p">)</span>
<span class="k">class</span> <span class="nc">RollingLike</span><span class="p">(</span><span class="n">RollingAndExpanding</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">window</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
<span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">):</span>
<span class="k">if</span> <span class="n">window</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;window must be &gt;= 0&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="p">(</span><span class="n">min_periods</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="n">min_periods</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;min_periods must be &gt;= 0&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">min_periods</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># TODO: &#39;min_periods&#39; is not equivalent in pandas because it does not count NA as</span>
<span class="c1"># a value.</span>
<span class="n">min_periods</span> <span class="o">=</span> <span class="n">window</span>
<span class="n">window_spec</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span> <span class="o">-</span> <span class="p">(</span><span class="n">window</span> <span class="o">-</span> <span class="mi">1</span><span class="p">),</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span>
<span class="p">)</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">window_spec</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">count</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="k">class</span> <span class="nc">Rolling</span><span class="p">(</span><span class="n">RollingLike</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">psdf_or_psser</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span>
<span class="n">window</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
<span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">):</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.frame</span> <span class="kn">import</span> <span class="n">DataFrame</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.series</span> <span class="kn">import</span> <span class="n">Series</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">window</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf_or_psser</span> <span class="o">=</span> <span class="n">psdf_or_psser</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psdf_or_psser</span><span class="p">,</span> <span class="p">(</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">Series</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;psdf_or_psser must be a series or dataframe; however, got: </span><span class="si">%s</span><span class="s2">&quot;</span>
<span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">psdf_or_psser</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">MissingPandasLikeRolling</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
<span class="n">property_or_func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">MissingPandasLikeRolling</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="nb">property</span><span class="p">):</span>
<span class="k">return</span> <span class="n">property_or_func</span><span class="o">.</span><span class="n">fget</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">partial</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_apply_as_series_or_frame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Column</span><span class="p">],</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf_or_psser</span><span class="o">.</span><span class="n">_apply_series_op</span><span class="p">(</span>
<span class="k">lambda</span> <span class="n">psser</span><span class="p">:</span> <span class="n">psser</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">func</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)),</span> <span class="c1"># TODO: dtype?</span>
<span class="n">should_resolve</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Rolling.count"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Rolling.count.html#pyspark.pandas.window.Rolling.count">[docs]</a> <span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The rolling count of any non-NaN observations inside the window.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Return type is the same as the original object with `np.float64` dtype.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.count : Count of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.count : Count of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 3, float(&quot;nan&quot;), 10])</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(1).count()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 2 0.0</span>
<span class="sd"> 3 1.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).count()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.to_frame().rolling(1).count()</span>
<span class="sd"> 0</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 2 0.0</span>
<span class="sd"> 3 1.0</span>
<span class="sd"> &gt;&gt;&gt; s.to_frame().rolling(3).count()</span>
<span class="sd"> 0</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">count</span><span class="p">()</span></div>
<div class="viewcode-block" id="Rolling.sum"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Rolling.sum.html#pyspark.pandas.window.Rolling.sum">[docs]</a> <span class="k">def</span> <span class="nf">sum</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate rolling summation of given DataFrame or Series.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Same type as the input, with the same index, containing the</span>
<span class="sd"> rolling summation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.sum : Reducing sum for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.sum : Reducing sum for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 3, 5, 2, 6])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 4</span>
<span class="sd"> 1 3</span>
<span class="sd"> 2 5</span>
<span class="sd"> 3 2</span>
<span class="sd"> 4 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(2).sum()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 7.0</span>
<span class="sd"> 2 8.0</span>
<span class="sd"> 3 7.0</span>
<span class="sd"> 4 8.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).sum()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 12.0</span>
<span class="sd"> 3 10.0</span>
<span class="sd"> 4 13.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling summation is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> A B</span>
<span class="sd"> 0 4 16</span>
<span class="sd"> 1 3 9</span>
<span class="sd"> 2 5 25</span>
<span class="sd"> 3 2 4</span>
<span class="sd"> 4 6 36</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(2).sum()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 7.0 25.0</span>
<span class="sd"> 2 8.0 34.0</span>
<span class="sd"> 3 7.0 29.0</span>
<span class="sd"> 4 8.0 40.0</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(3).sum()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 12.0 50.0</span>
<span class="sd"> 3 10.0 38.0</span>
<span class="sd"> 4 13.0 65.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span></div>
<div class="viewcode-block" id="Rolling.min"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Rolling.min.html#pyspark.pandas.window.Rolling.min">[docs]</a> <span class="k">def</span> <span class="nf">min</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the rolling minimum.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with a Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with a DataFrame.</span>
<span class="sd"> pyspark.pandas.Series.min : Similar method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.min : Similar method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 3, 5, 2, 6])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 4</span>
<span class="sd"> 1 3</span>
<span class="sd"> 2 5</span>
<span class="sd"> 3 2</span>
<span class="sd"> 4 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(2).min()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 3.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> 4 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).min()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> 4 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling minimum is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> A B</span>
<span class="sd"> 0 4 16</span>
<span class="sd"> 1 3 9</span>
<span class="sd"> 2 5 25</span>
<span class="sd"> 3 2 4</span>
<span class="sd"> 4 6 36</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(2).min()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 3.0 9.0</span>
<span class="sd"> 2 3.0 9.0</span>
<span class="sd"> 3 2.0 4.0</span>
<span class="sd"> 4 2.0 4.0</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(3).min()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 3.0 9.0</span>
<span class="sd"> 3 2.0 4.0</span>
<span class="sd"> 4 2.0 4.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">min</span><span class="p">()</span></div>
<div class="viewcode-block" id="Rolling.max"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Rolling.max.html#pyspark.pandas.window.Rolling.max">[docs]</a> <span class="k">def</span> <span class="nf">max</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the rolling maximum.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Return type is determined by the caller.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Series rolling.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : DataFrame rolling.</span>
<span class="sd"> pyspark.pandas.Series.max : Similar method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.max : Similar method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 3, 5, 2, 6])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 4</span>
<span class="sd"> 1 3</span>
<span class="sd"> 2 5</span>
<span class="sd"> 3 2</span>
<span class="sd"> 4 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(2).max()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 2 5.0</span>
<span class="sd"> 3 5.0</span>
<span class="sd"> 4 6.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).max()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 5.0</span>
<span class="sd"> 3 5.0</span>
<span class="sd"> 4 6.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling maximum is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> A B</span>
<span class="sd"> 0 4 16</span>
<span class="sd"> 1 3 9</span>
<span class="sd"> 2 5 25</span>
<span class="sd"> 3 2 4</span>
<span class="sd"> 4 6 36</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(2).max()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 4.0 16.0</span>
<span class="sd"> 2 5.0 25.0</span>
<span class="sd"> 3 5.0 25.0</span>
<span class="sd"> 4 6.0 36.0</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(3).max()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 5.0 25.0</span>
<span class="sd"> 3 5.0 25.0</span>
<span class="sd"> 4 6.0 36.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span></div>
<div class="viewcode-block" id="Rolling.mean"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Rolling.mean.html#pyspark.pandas.window.Rolling.mean">[docs]</a> <span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the rolling mean of the values.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.mean : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.mean : Equivalent method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 3, 5, 2, 6])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 4</span>
<span class="sd"> 1 3</span>
<span class="sd"> 2 5</span>
<span class="sd"> 3 2</span>
<span class="sd"> 4 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(2).mean()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 3.5</span>
<span class="sd"> 2 4.0</span>
<span class="sd"> 3 3.5</span>
<span class="sd"> 4 4.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).mean()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 4.000000</span>
<span class="sd"> 3 3.333333</span>
<span class="sd"> 4 4.333333</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling mean is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> A B</span>
<span class="sd"> 0 4 16</span>
<span class="sd"> 1 3 9</span>
<span class="sd"> 2 5 25</span>
<span class="sd"> 3 2 4</span>
<span class="sd"> 4 6 36</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(2).mean()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 3.5 12.5</span>
<span class="sd"> 2 4.0 17.0</span>
<span class="sd"> 3 3.5 14.5</span>
<span class="sd"> 4 4.0 20.0</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(3).mean()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 4.000000 16.666667</span>
<span class="sd"> 3 3.333333 12.666667</span>
<span class="sd"> 4 4.333333 21.666667</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
<div class="viewcode-block" id="Rolling.quantile"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Rolling.quantile.html#pyspark.pandas.window.Rolling.quantile">[docs]</a> <span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">quantile</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10000</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the rolling quantile of the values.</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> quantile : float</span>
<span class="sd"> Value between 0 and 1 providing the quantile to compute.</span>
<span class="sd"> .. deprecated:: 4.0.0</span>
<span class="sd"> This will be renamed to ‘q’ in a future version.</span>
<span class="sd"> accuracy : int, optional</span>
<span class="sd"> Default accuracy of approximation. Larger value means better accuracy.</span>
<span class="sd"> The relative error can be deduced by 1.0 / accuracy.</span>
<span class="sd"> This is a panda-on-Spark specific parameter.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> `quantile` in pandas-on-Spark are using distributed percentile approximation</span>
<span class="sd"> algorithm unlike pandas, the result might be different with pandas, also `interpolation`</span>
<span class="sd"> parameter is not supported yet.</span>
<span class="sd"> the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling rolling with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling rolling with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.quantile : Aggregating quantile for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.quantile : Aggregating quantile for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 3, 5, 2, 6])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 4</span>
<span class="sd"> 1 3</span>
<span class="sd"> 2 5</span>
<span class="sd"> 3 2</span>
<span class="sd"> 4 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(2).quantile(0.5)</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 3.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> 4 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).quantile(0.5)</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 4.0</span>
<span class="sd"> 3 3.0</span>
<span class="sd"> 4 5.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling quantile is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> A B</span>
<span class="sd"> 0 4 16</span>
<span class="sd"> 1 3 9</span>
<span class="sd"> 2 5 25</span>
<span class="sd"> 3 2 4</span>
<span class="sd"> 4 6 36</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(2).quantile(0.5)</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 3.0 9.0</span>
<span class="sd"> 2 3.0 9.0</span>
<span class="sd"> 3 2.0 4.0</span>
<span class="sd"> 4 2.0 4.0</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(3).quantile(0.5)</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 4.0 16.0</span>
<span class="sd"> 3 3.0 9.0</span>
<span class="sd"> 4 5.0 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="n">quantile</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">std</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate rolling standard deviation.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).std()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 0.577350</span>
<span class="sd"> 3 1.000000</span>
<span class="sd"> 4 1.000000</span>
<span class="sd"> 5 1.154701</span>
<span class="sd"> 6 0.000000</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling standard deviation is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(2).std()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 0.000000 0.000000</span>
<span class="sd"> 2 0.707107 7.778175</span>
<span class="sd"> 3 0.707107 9.192388</span>
<span class="sd"> 4 1.414214 16.970563</span>
<span class="sd"> 5 0.000000 0.000000</span>
<span class="sd"> 6 0.000000 0.000000</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">std</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">var</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased rolling variance.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.rolling : Calling object with Series data.</span>
<span class="sd"> DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> Series.var : Equivalent method for Series.</span>
<span class="sd"> DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).var()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 0.333333</span>
<span class="sd"> 3 1.000000</span>
<span class="sd"> 4 1.000000</span>
<span class="sd"> 5 1.333333</span>
<span class="sd"> 6 0.000000</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each unbiased rolling variance is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(2).var()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 0.0 0.0</span>
<span class="sd"> 2 0.5 60.5</span>
<span class="sd"> 3 0.5 84.5</span>
<span class="sd"> 4 2.0 288.0</span>
<span class="sd"> 5 0.0 0.0</span>
<span class="sd"> 6 0.0 0.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">var</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">skew</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased rolling skew.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 1, 5, 9])</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(3).skew()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 1.732051</span>
<span class="sd"> 3 0.000000</span>
<span class="sd"> 4 0.000000</span>
<span class="sd"> 5 -0.935220</span>
<span class="sd"> 6 -1.732051</span>
<span class="sd"> 7 0.000000</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling standard deviation is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(5).skew()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 NaN NaN</span>
<span class="sd"> 3 NaN NaN</span>
<span class="sd"> 4 1.257788 1.369456</span>
<span class="sd"> 5 -1.492685 -0.526039</span>
<span class="sd"> 6 -1.492685 -0.526039</span>
<span class="sd"> 7 -0.551618 0.686072</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">skew</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">kurt</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased rolling kurtosis.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.var : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 1, 5, 9])</span>
<span class="sd"> &gt;&gt;&gt; s.rolling(4).kurt()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 NaN</span>
<span class="sd"> 3 -1.289256</span>
<span class="sd"> 4 -1.289256</span>
<span class="sd"> 5 2.234867</span>
<span class="sd"> 6 2.227147</span>
<span class="sd"> 7 1.500000</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each unbiased rolling variance is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.rolling(5).kurt()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 NaN NaN</span>
<span class="sd"> 3 NaN NaN</span>
<span class="sd"> 4 0.312500 0.906336</span>
<span class="sd"> 5 2.818047 1.016942</span>
<span class="sd"> 6 2.818047 1.016942</span>
<span class="sd"> 7 0.867769 0.389750</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">kurt</span><span class="p">()</span>
<span class="k">class</span> <span class="nc">RollingGroupby</span><span class="p">(</span><span class="n">RollingLike</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">groupby</span><span class="p">:</span> <span class="n">GroupBy</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">],</span>
<span class="n">window</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
<span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">window</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_groupby</span> <span class="o">=</span> <span class="n">groupby</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_window</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">ser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="k">for</span> <span class="n">ser</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span>
<span class="o">*</span><span class="p">[</span><span class="n">ser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="k">for</span> <span class="n">ser</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">]</span>
<span class="p">)</span>
<span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">MissingPandasLikeRollingGroupby</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
<span class="n">property_or_func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">MissingPandasLikeRollingGroupby</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="nb">property</span><span class="p">):</span>
<span class="k">return</span> <span class="n">property_or_func</span><span class="o">.</span><span class="n">fget</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">partial</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_apply_as_series_or_frame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Column</span><span class="p">],</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Wraps a function that handles Spark column in order</span>
<span class="sd"> to support it in both pandas-on-Spark Series and DataFrame.</span>
<span class="sd"> Note that the given `func` name should be same as the API&#39;s method name.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas</span> <span class="kn">import</span> <span class="n">DataFrame</span>
<span class="n">groupby</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_groupby</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_psdf</span>
<span class="c1"># Here we need to include grouped key as an index, and shift previous index.</span>
<span class="c1"># [index_column0, index_column1] -&gt; [grouped key, index_column0, index_column1]</span>
<span class="n">new_index_scols</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Column</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">new_index_spark_column_names</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">new_index_names</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">new_index_fields</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">groupkey</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">:</span>
<span class="n">index_column_name</span> <span class="o">=</span> <span class="n">SPARK_INDEX_NAME_FORMAT</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">new_index_scols</span><span class="p">))</span>
<span class="n">new_index_scols</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">groupkey</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">index_column_name</span><span class="p">))</span>
<span class="n">new_index_spark_column_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index_column_name</span><span class="p">)</span>
<span class="n">new_index_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">groupkey</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="n">new_index_fields</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">groupkey</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">index_column_name</span><span class="p">))</span>
<span class="k">for</span> <span class="n">new_index_scol</span><span class="p">,</span> <span class="n">index_name</span><span class="p">,</span> <span class="n">index_field</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">,</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_names</span><span class="p">,</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_fields</span><span class="p">,</span>
<span class="p">):</span>
<span class="n">index_column_name</span> <span class="o">=</span> <span class="n">SPARK_INDEX_NAME_FORMAT</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">new_index_scols</span><span class="p">))</span>
<span class="n">new_index_scols</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">new_index_scol</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">index_column_name</span><span class="p">))</span>
<span class="n">new_index_spark_column_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index_column_name</span><span class="p">)</span>
<span class="n">new_index_names</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index_name</span><span class="p">)</span>
<span class="n">new_index_fields</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">index_field</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">index_column_name</span><span class="p">))</span>
<span class="k">if</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_agg_columns_selected</span><span class="p">:</span>
<span class="n">agg_columns</span> <span class="o">=</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_agg_columns</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># pandas doesn&#39;t keep the groupkey as a column from 1.3 for DataFrameGroupBy</span>
<span class="n">column_labels_to_exclude</span> <span class="o">=</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_column_labels_to_exclude</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">groupby</span><span class="p">,</span> <span class="n">DataFrameGroupBy</span><span class="p">):</span>
<span class="k">for</span> <span class="n">groupkey</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">:</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="n">column_labels_to_exclude</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">groupkey</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="n">agg_columns</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">label</span><span class="p">)</span>
<span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span>
<span class="k">if</span> <span class="n">label</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">column_labels_to_exclude</span>
<span class="p">]</span>
<span class="n">applied</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">agg_column</span> <span class="ow">in</span> <span class="n">agg_columns</span><span class="p">:</span>
<span class="n">applied</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">agg_column</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">func</span><span class="p">(</span><span class="n">agg_column</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)))</span> <span class="c1"># TODO: dtype?</span>
<span class="c1"># Seems like pandas filters out when grouped key is NA.</span>
<span class="n">cond</span> <span class="o">=</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">()</span>
<span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">[</span><span class="mi">1</span><span class="p">:]:</span>
<span class="n">cond</span> <span class="o">=</span> <span class="n">cond</span> <span class="o">|</span> <span class="n">c</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">()</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">cond</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="n">new_index_scols</span> <span class="o">+</span> <span class="p">[</span><span class="n">c</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">applied</span><span class="p">]</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">new_index_spark_column_names</span><span class="p">],</span>
<span class="n">index_names</span><span class="o">=</span><span class="n">new_index_names</span><span class="p">,</span>
<span class="n">index_fields</span><span class="o">=</span><span class="n">new_index_fields</span><span class="p">,</span>
<span class="n">column_labels</span><span class="o">=</span><span class="p">[</span><span class="n">c</span><span class="o">.</span><span class="n">_column_label</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">applied</span><span class="p">],</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span>
<span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">c</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">applied</span>
<span class="p">],</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span><span class="n">c</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">applied</span><span class="p">],</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_handle_output</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The rolling count of any non-NaN observations inside the window.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.count : Count of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.count : Count of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).rolling(3).count().sort_index()</span>
<span class="sd"> 2 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 3 2 1.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 1.0</span>
<span class="sd"> 6 2.0</span>
<span class="sd"> 7 3.0</span>
<span class="sd"> 8 3.0</span>
<span class="sd"> 5 9 1.0</span>
<span class="sd"> 10 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling count is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).rolling(2).count().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 3 2 1.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> 4 2.0</span>
<span class="sd"> 4 5 1.0</span>
<span class="sd"> 6 2.0</span>
<span class="sd"> 7 2.0</span>
<span class="sd"> 8 2.0</span>
<span class="sd"> 5 9 1.0</span>
<span class="sd"> 10 2.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">count</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">sum</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The rolling summation of any non-NaN observations inside the window.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.sum : Sum of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.sum : Sum of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).rolling(3).sum().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 12.0</span>
<span class="sd"> 8 12.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling summation is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).rolling(2).sum().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 8.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 18.0</span>
<span class="sd"> 4 18.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 32.0</span>
<span class="sd"> 7 32.0</span>
<span class="sd"> 8 32.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 50.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">min</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The rolling minimum of any non-NaN observations inside the window.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.min : Min of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.min : Min of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).rolling(3).min().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling minimum is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).rolling(2).min().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">min</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">max</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The rolling maximum of any non-NaN observations inside the window.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.max : Max of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.max : Max of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).rolling(3).max().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling maximum is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).rolling(2).max().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The rolling mean of any non-NaN observations inside the window.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.mean : Mean of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.mean : Mean of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).rolling(3).mean().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling mean is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).rolling(2).mean().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">quantile</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10000</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate rolling quantile.</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> quantile : float</span>
<span class="sd"> Value between 0 and 1 providing the quantile to compute.</span>
<span class="sd"> accuracy : int, optional</span>
<span class="sd"> Default accuracy of approximation. Larger value means better accuracy.</span>
<span class="sd"> The relative error can be deduced by 1.0 / accuracy.</span>
<span class="sd"> This is a panda-on-Spark specific parameter.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the rolling</span>
<span class="sd"> calculation.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> `quantile` in pandas-on-Spark are using distributed percentile approximation</span>
<span class="sd"> algorithm unlike pandas, the result might be different with pandas, also `interpolation`</span>
<span class="sd"> parameter is not supported yet.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling rolling with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling rolling with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.quantile : Aggregating quantile for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.quantile : Aggregating quantile for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).rolling(3).quantile(0.5).sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each rolling quantile is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).rolling(2).quantile(0.5).sort_index()</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="n">quantile</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">std</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate rolling standard deviation.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">std</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">var</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased rolling variance.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.var : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">var</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">skew</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased rolling skew.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">skew</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">kurt</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased rolling kurtosis.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the rolling calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.rolling : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.var : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">kurt</span><span class="p">()</span>
<span class="k">class</span> <span class="nc">ExpandingLike</span><span class="p">(</span><span class="n">RollingAndExpanding</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">):</span>
<span class="k">if</span> <span class="n">min_periods</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;min_periods must be &gt;= 0&quot;</span><span class="p">)</span>
<span class="n">window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span>
<span class="p">)</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">window</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">count</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="k">class</span> <span class="nc">Expanding</span><span class="p">(</span><span class="n">ExpandingLike</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">psdf_or_psser</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">):</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.frame</span> <span class="kn">import</span> <span class="n">DataFrame</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.series</span> <span class="kn">import</span> <span class="n">Series</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">min_periods</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psdf_or_psser</span><span class="p">,</span> <span class="p">(</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">Series</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;psdf_or_psser must be a series or dataframe; however, got: </span><span class="si">%s</span><span class="s2">&quot;</span>
<span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">psdf_or_psser</span><span class="p">)</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf_or_psser</span> <span class="o">=</span> <span class="n">psdf_or_psser</span>
<span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">MissingPandasLikeExpanding</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
<span class="n">property_or_func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">MissingPandasLikeExpanding</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="nb">property</span><span class="p">):</span>
<span class="k">return</span> <span class="n">property_or_func</span><span class="o">.</span><span class="n">fget</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">partial</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="c1"># TODO: when add &#39;axis&#39; parameter, should add to here too.</span>
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="k">return</span> <span class="s2">&quot;Expanding [min_periods=</span><span class="si">{}</span><span class="s2">]&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">)</span>
<span class="n">_apply_as_series_or_frame</span> <span class="o">=</span> <span class="n">Rolling</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span>
<div class="viewcode-block" id="Expanding.count"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Expanding.count.html#pyspark.pandas.window.Expanding.count">[docs]</a> <span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The expanding count of any non-NaN observations inside the window.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.count : Count of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.count : Count of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 3, float(&quot;nan&quot;), 10])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding().count()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 3.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.to_frame().expanding().count()</span>
<span class="sd"> 0</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 3.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">count</span><span class="p">()</span></div>
<div class="viewcode-block" id="Expanding.sum"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Expanding.sum.html#pyspark.pandas.window.Expanding.sum">[docs]</a> <span class="k">def</span> <span class="nf">sum</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate expanding summation of given DataFrame or Series.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Same type as the input, with the same index, containing the</span>
<span class="sd"> expanding summation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.sum : Reducing sum for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.sum : Reducing sum for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4, 5])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> 4 5</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).sum()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 6.0</span>
<span class="sd"> 3 10.0</span>
<span class="sd"> 4 15.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding summation is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> A B</span>
<span class="sd"> 0 1 1</span>
<span class="sd"> 1 2 4</span>
<span class="sd"> 2 3 9</span>
<span class="sd"> 3 4 16</span>
<span class="sd"> 4 5 25</span>
<span class="sd"> &gt;&gt;&gt; df.expanding(3).sum()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 6.0 14.0</span>
<span class="sd"> 3 10.0 30.0</span>
<span class="sd"> 4 15.0 55.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span></div>
<div class="viewcode-block" id="Expanding.min"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Expanding.min.html#pyspark.pandas.window.Expanding.min">[docs]</a> <span class="k">def</span> <span class="nf">min</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding minimum.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with a Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with a DataFrame.</span>
<span class="sd"> pyspark.pandas.Series.min : Similar method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.min : Similar method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Performing a expanding minimum with a window size of 3.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 3, 5, 2, 6])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).min()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> 4 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">min</span><span class="p">()</span></div>
<div class="viewcode-block" id="Expanding.max"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Expanding.max.html#pyspark.pandas.window.Expanding.max">[docs]</a> <span class="k">def</span> <span class="nf">max</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding maximum.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Return type is determined by the caller.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.max : Similar method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.max : Similar method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Performing a expanding minimum with a window size of 3.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 3, 5, 2, 6])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).max()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 5.0</span>
<span class="sd"> 3 5.0</span>
<span class="sd"> 4 6.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span></div>
<div class="viewcode-block" id="Expanding.mean"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Expanding.mean.html#pyspark.pandas.window.Expanding.mean">[docs]</a> <span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding mean of the values.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.mean : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.mean : Equivalent method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> The below examples will show expanding mean calculations with window sizes of</span>
<span class="sd"> two and three, respectively.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(2).mean()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 1.5</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 2.5</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).mean()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 2.5</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
<div class="viewcode-block" id="Expanding.quantile"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.Expanding.quantile.html#pyspark.pandas.window.Expanding.quantile">[docs]</a> <span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">quantile</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10000</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding quantile of the values.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> quantile : float</span>
<span class="sd"> Value between 0 and 1 providing the quantile to compute.</span>
<span class="sd"> accuracy : int, optional</span>
<span class="sd"> Default accuracy of approximation. Larger value means better accuracy.</span>
<span class="sd"> The relative error can be deduced by 1.0 / accuracy.</span>
<span class="sd"> This is a panda-on-Spark specific parameter.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> `quantile` in pandas-on-Spark are using distributed percentile approximation</span>
<span class="sd"> algorithm unlike pandas, the result might be different with pandas (the result is</span>
<span class="sd"> similar to the interpolation set to `lower`), also `interpolation` parameter is</span>
<span class="sd"> not supported yet.</span>
<span class="sd"> the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling expanding with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling expanding with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.quantile : Aggregating quantile for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.quantile : Aggregating quantile for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> The below examples will show expanding quantile calculations with window sizes of</span>
<span class="sd"> two and three, respectively.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(2).quantile(0.5)</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).quantile(0.5)</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="n">quantile</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">std</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate expanding standard deviation.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).std()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 0.577350</span>
<span class="sd"> 3 0.957427</span>
<span class="sd"> 4 0.894427</span>
<span class="sd"> 5 0.836660</span>
<span class="sd"> 6 0.786796</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding standard deviation variance is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.expanding(2).std()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 0.000000 0.000000</span>
<span class="sd"> 2 0.577350 6.350853</span>
<span class="sd"> 3 0.957427 11.412712</span>
<span class="sd"> 4 0.894427 10.630146</span>
<span class="sd"> 5 0.836660 9.928075</span>
<span class="sd"> 6 0.786796 9.327379</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">std</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">var</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased expanding variance.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.var : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).var()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 0.333333</span>
<span class="sd"> 3 0.916667</span>
<span class="sd"> 4 0.800000</span>
<span class="sd"> 5 0.700000</span>
<span class="sd"> 6 0.619048</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each unbiased expanding variance is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.expanding(2).var()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 0.000000 0.000000</span>
<span class="sd"> 2 0.333333 40.333333</span>
<span class="sd"> 3 0.916667 130.250000</span>
<span class="sd"> 4 0.800000 113.000000</span>
<span class="sd"> 5 0.700000 98.566667</span>
<span class="sd"> 6 0.619048 87.000000</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">var</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">skew</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased expanding skew.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 1, 5, 9])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(3).skew()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 1.732051</span>
<span class="sd"> 3 0.854563</span>
<span class="sd"> 4 1.257788</span>
<span class="sd"> 5 -1.571593</span>
<span class="sd"> 6 -1.657542</span>
<span class="sd"> 7 -0.521760</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding standard deviation variance is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.expanding(5).skew()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 NaN NaN</span>
<span class="sd"> 3 NaN NaN</span>
<span class="sd"> 4 1.257788 1.369456</span>
<span class="sd"> 5 -1.571593 -0.423309</span>
<span class="sd"> 6 -1.657542 -0.355737</span>
<span class="sd"> 7 -0.521760 1.116874</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">skew</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">kurt</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased expanding kurtosis.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.var : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([5, 5, 6, 7, 5, 1, 5, 9])</span>
<span class="sd"> &gt;&gt;&gt; s.expanding(4).kurt()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 NaN</span>
<span class="sd"> 3 -1.289256</span>
<span class="sd"> 4 0.312500</span>
<span class="sd"> 5 3.419520</span>
<span class="sd"> 6 4.028185</span>
<span class="sd"> 7 2.230373</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each unbiased expanding variance is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.expanding(5).kurt()</span>
<span class="sd"> A B</span>
<span class="sd"> 0 NaN NaN</span>
<span class="sd"> 1 NaN NaN</span>
<span class="sd"> 2 NaN NaN</span>
<span class="sd"> 3 NaN NaN</span>
<span class="sd"> 4 0.312500 0.906336</span>
<span class="sd"> 5 3.419520 1.486581</span>
<span class="sd"> 6 4.028185 1.936169</span>
<span class="sd"> 7 2.230373 2.273792</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">kurt</span><span class="p">()</span>
<span class="k">class</span> <span class="nc">ExpandingGroupby</span><span class="p">(</span><span class="n">ExpandingLike</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">groupby</span><span class="p">:</span> <span class="n">GroupBy</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">],</span> <span class="n">min_periods</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">):</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">min_periods</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_groupby</span> <span class="o">=</span> <span class="n">groupby</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_window</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">ser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="k">for</span> <span class="n">ser</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span>
<span class="o">*</span><span class="p">[</span><span class="n">ser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="k">for</span> <span class="n">ser</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">]</span>
<span class="p">)</span>
<span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">MissingPandasLikeExpandingGroupby</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
<span class="n">property_or_func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">MissingPandasLikeExpandingGroupby</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="nb">property</span><span class="p">):</span>
<span class="k">return</span> <span class="n">property_or_func</span><span class="o">.</span><span class="n">fget</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">partial</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="n">_apply_as_series_or_frame</span> <span class="o">=</span> <span class="n">RollingGroupby</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span>
<span class="k">def</span> <span class="nf">count</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> The expanding count of any non-NaN observations inside the window.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.count : Count of the full Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.count : Count of the full DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).expanding(3).count().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 3.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding count is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).expanding(2).count().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 2.0</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 2.0</span>
<span class="sd"> 7 3.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 2.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">count</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">sum</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate expanding summation of given DataFrame or Series.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Same type as the input, with the same index, containing the</span>
<span class="sd"> expanding summation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.sum : Reducing sum for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.sum : Reducing sum for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).expanding(3).sum().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 12.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding summation is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).expanding(2).sum().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 8.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 18.0</span>
<span class="sd"> 4 27.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 32.0</span>
<span class="sd"> 7 48.0</span>
<span class="sd"> 8 64.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 50.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">min</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding minimum.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with a Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with a DataFrame.</span>
<span class="sd"> pyspark.pandas.Series.min : Similar method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.min : Similar method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).expanding(3).min().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding minimum is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).expanding(2).min().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">min</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">max</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding maximum.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Return type is determined by the caller.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.max : Similar method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.max : Similar method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).expanding(3).max().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding maximum is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).expanding(2).max().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding mean of the values.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.mean : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.mean : Equivalent method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).expanding(3).mean().sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding mean is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).expanding(2).mean().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">quantile</span><span class="p">:</span> <span class="nb">float</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10000</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate the expanding quantile of the values.</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> quantile : float</span>
<span class="sd"> Value between 0 and 1 providing the quantile to compute.</span>
<span class="sd"> accuracy : int, optional</span>
<span class="sd"> Default accuracy of approximation. Larger value means better accuracy.</span>
<span class="sd"> The relative error can be deduced by 1.0 / accuracy.</span>
<span class="sd"> This is a panda-on-Spark specific parameter.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the expanding</span>
<span class="sd"> calculation.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> `quantile` in pandas-on-Spark are using distributed percentile approximation</span>
<span class="sd"> algorithm unlike pandas, the result might be different with pandas, also `interpolation`</span>
<span class="sd"> parameter is not supported yet.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling expanding with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling expanding with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.quantile : Aggregating quantile for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.quantile : Aggregating quantile for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).expanding(3).quantile(0.5).sort_index()</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each expanding quantile is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).expanding(2).quantile(0.5).sort_index()</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 NaN</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 NaN</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 NaN</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 NaN</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="n">quantile</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">std</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate expanding standard deviation.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding: Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">std</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">var</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased expanding variance.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.var : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">var</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">skew</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate expanding standard skew.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding: Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.std : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.std : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">skew</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">kurt</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate unbiased expanding kurtosis.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returns the same object type as the caller of the expanding calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.var : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.</span>
<span class="sd"> numpy.var : Equivalent method for Numpy array.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">kurt</span><span class="p">()</span>
<span class="k">class</span> <span class="nc">ExponentialMovingLike</span><span class="p">(</span><span class="n">Generic</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">],</span> <span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">window</span><span class="p">:</span> <span class="n">WindowSpec</span><span class="p">,</span>
<span class="n">com</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">span</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">halflife</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">alpha</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ignore_na</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">):</span>
<span class="k">if</span> <span class="p">(</span><span class="n">min_periods</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="n">min_periods</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;min_periods must be &gt;= 0&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">min_periods</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">min_periods</span> <span class="o">=</span> <span class="mi">0</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span> <span class="o">=</span> <span class="n">min_periods</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_ignore_na</span> <span class="o">=</span> <span class="n">ignore_na</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_window</span> <span class="o">=</span> <span class="n">window</span>
<span class="c1"># This unbounded Window is later used to handle &#39;min_periods&#39; for now.</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span>
<span class="p">)</span>
<span class="k">if</span> <span class="p">(</span><span class="n">com</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="ow">not</span> <span class="n">com</span> <span class="o">&gt;=</span> <span class="mi">0</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;com must be &gt;= 0&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_com</span> <span class="o">=</span> <span class="n">com</span>
<span class="k">if</span> <span class="p">(</span><span class="n">span</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="ow">not</span> <span class="n">span</span> <span class="o">&gt;=</span> <span class="mi">1</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;span must be &gt;= 1&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_span</span> <span class="o">=</span> <span class="n">span</span>
<span class="k">if</span> <span class="p">(</span><span class="n">halflife</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="ow">not</span> <span class="n">halflife</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;halflife must be &gt; 0&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_halflife</span> <span class="o">=</span> <span class="n">halflife</span>
<span class="k">if</span> <span class="p">(</span><span class="n">alpha</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="ow">not</span> <span class="mi">0</span> <span class="o">&lt;</span> <span class="n">alpha</span> <span class="o">&lt;=</span> <span class="mi">1</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;alpha must be in (0, 1]&quot;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_alpha</span> <span class="o">=</span> <span class="n">alpha</span>
<span class="k">def</span> <span class="nf">_compute_unified_alpha</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="n">unified_alpha</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="n">opt_count</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_com</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">unified_alpha</span> <span class="o">=</span> <span class="mf">1.0</span> <span class="o">/</span> <span class="p">(</span><span class="mi">1</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">_com</span><span class="p">)</span>
<span class="n">opt_count</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_span</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">unified_alpha</span> <span class="o">=</span> <span class="mf">2.0</span> <span class="o">/</span> <span class="p">(</span><span class="mi">1</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">_span</span><span class="p">)</span>
<span class="n">opt_count</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_halflife</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">unified_alpha</span> <span class="o">=</span> <span class="mf">1.0</span> <span class="o">-</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span><span class="o">-</span><span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">_halflife</span><span class="p">)</span>
<span class="n">opt_count</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_alpha</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">unified_alpha</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_alpha</span>
<span class="n">opt_count</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">if</span> <span class="n">opt_count</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Must pass one of com, span, halflife, or alpha&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">opt_count</span> <span class="o">!=</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;com, span, halflife, and alpha are mutually exclusive&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">unified_alpha</span>
<span class="nd">@abstractmethod</span>
<span class="k">def</span> <span class="nf">_apply_as_series_or_frame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Column</span><span class="p">],</span> <span class="n">Column</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Wraps a function that handles Spark column in order</span>
<span class="sd"> to support it in both pandas-on-Spark Series and DataFrame.</span>
<span class="sd"> Note that the given `func` name should be same as the API&#39;s method name.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">pass</span>
<span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="n">unified_alpha</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_compute_unified_alpha</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="n">col_ewm</span> <span class="o">=</span> <span class="n">SF</span><span class="o">.</span><span class="n">ewm</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">unified_alpha</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_ignore_na</span><span class="p">)</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="o">~</span><span class="n">scol</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="p">)</span>
<span class="o">&gt;=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="n">col_ewm</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span><span class="p">(</span><span class="n">mean</span><span class="p">)</span>
<span class="k">class</span> <span class="nc">ExponentialMoving</span><span class="p">(</span><span class="n">ExponentialMovingLike</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">psdf_or_psser</span><span class="p">:</span> <span class="n">FrameLike</span><span class="p">,</span>
<span class="n">com</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">span</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">halflife</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">alpha</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ignore_na</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">):</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.frame</span> <span class="kn">import</span> <span class="n">DataFrame</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.series</span> <span class="kn">import</span> <span class="n">Series</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psdf_or_psser</span><span class="p">,</span> <span class="p">(</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">Series</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;psdf_or_psser must be a series or dataframe; however, got: </span><span class="si">%s</span><span class="s2">&quot;</span>
<span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">psdf_or_psser</span><span class="p">)</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf_or_psser</span> <span class="o">=</span> <span class="n">psdf_or_psser</span>
<span class="n">window_spec</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span>
<span class="p">)</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">window_spec</span><span class="p">,</span> <span class="n">com</span><span class="p">,</span> <span class="n">span</span><span class="p">,</span> <span class="n">halflife</span><span class="p">,</span> <span class="n">alpha</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">,</span> <span class="n">ignore_na</span><span class="p">)</span>
<span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">MissingPandasLikeExponentialMoving</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
<span class="n">property_or_func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">MissingPandasLikeExponentialMoving</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="nb">property</span><span class="p">):</span>
<span class="k">return</span> <span class="n">property_or_func</span><span class="o">.</span><span class="n">fget</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">partial</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="n">_apply_as_series_or_frame</span> <span class="o">=</span> <span class="n">Rolling</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span>
<div class="viewcode-block" id="ExponentialMoving.mean"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.window.ExponentialMoving.mean.html#pyspark.pandas.window.ExponentialMoving.mean">[docs]</a> <span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate an online exponentially weighted mean.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> There are behavior differences between pandas-on-Spark and pandas.</span>
<span class="sd"> * the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the exponentially</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.mean : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.mean : Equivalent method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> The below examples will show computing exponentially weighted moving average.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;s1&#39;: [.2, .0, .6, .2, .4, .5, .6], &#39;s2&#39;: [2, 1, 3, 1, 0, 0, 0]})</span>
<span class="sd"> &gt;&gt;&gt; df.ewm(com=0.1).mean()</span>
<span class="sd"> s1 s2</span>
<span class="sd"> 0 0.200000 2.000000</span>
<span class="sd"> 1 0.016667 1.083333</span>
<span class="sd"> 2 0.547368 2.827068</span>
<span class="sd"> 3 0.231557 1.165984</span>
<span class="sd"> 4 0.384688 0.105992</span>
<span class="sd"> 5 0.489517 0.009636</span>
<span class="sd"> 6 0.589956 0.000876</span>
<span class="sd"> &gt;&gt;&gt; df.s2.ewm(halflife=1.5, min_periods=3).mean()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 2.182572</span>
<span class="sd"> 3 1.663174</span>
<span class="sd"> 4 0.979949</span>
<span class="sd"> 5 0.593155</span>
<span class="sd"> 6 0.364668</span>
<span class="sd"> Name: s2, dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span></div>
<span class="c1"># TODO: when add &#39;adjust&#39; parameter, should add to here too.</span>
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="k">return</span> <span class="p">(</span>
<span class="s2">&quot;ExponentialMoving [com=</span><span class="si">{}</span><span class="s2">, span=</span><span class="si">{}</span><span class="s2">, halflife=</span><span class="si">{}</span><span class="s2">, alpha=</span><span class="si">{}</span><span class="s2">, &quot;</span>
<span class="s2">&quot;min_periods=</span><span class="si">{}</span><span class="s2">, ignore_na=</span><span class="si">{}</span><span class="s2">]&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_com</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_span</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_halflife</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_alpha</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_ignore_na</span><span class="p">,</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">class</span> <span class="nc">ExponentialMovingGroupby</span><span class="p">(</span><span class="n">ExponentialMovingLike</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">]):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">groupby</span><span class="p">:</span> <span class="n">GroupBy</span><span class="p">[</span><span class="n">FrameLike</span><span class="p">],</span>
<span class="n">com</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">span</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">halflife</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">alpha</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ignore_na</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">):</span>
<span class="n">window_spec</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span>
<span class="p">)</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="n">window_spec</span><span class="p">,</span> <span class="n">com</span><span class="p">,</span> <span class="n">span</span><span class="p">,</span> <span class="n">halflife</span><span class="p">,</span> <span class="n">alpha</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">,</span> <span class="n">ignore_na</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_groupby</span> <span class="o">=</span> <span class="n">groupby</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_window</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">ser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="k">for</span> <span class="n">ser</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">])</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_unbounded_window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span>
<span class="o">*</span><span class="p">[</span><span class="n">ser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="k">for</span> <span class="n">ser</span> <span class="ow">in</span> <span class="n">groupby</span><span class="o">.</span><span class="n">_groupkeys</span><span class="p">]</span>
<span class="p">)</span>
<span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">MissingPandasLikeExponentialMovingGroupby</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
<span class="n">property_or_func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">MissingPandasLikeExponentialMovingGroupby</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="nb">property</span><span class="p">):</span>
<span class="k">return</span> <span class="n">property_or_func</span><span class="o">.</span><span class="n">fget</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">partial</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="n">_apply_as_series_or_frame</span> <span class="o">=</span> <span class="n">RollingGroupby</span><span class="o">.</span><span class="n">_apply_as_series_or_frame</span>
<span class="k">def</span> <span class="nf">mean</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">FrameLike</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Calculate an online exponentially weighted mean.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> There are behavior differences between pandas-on-Spark and pandas.</span>
<span class="sd"> * the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> Returned object type is determined by the caller of the exponentially</span>
<span class="sd"> calculation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> pyspark.pandas.Series.expanding : Calling object with Series data.</span>
<span class="sd"> pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.</span>
<span class="sd"> pyspark.pandas.Series.mean : Equivalent method for Series.</span>
<span class="sd"> pyspark.pandas.DataFrame.mean : Equivalent method for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.groupby(s).ewm(alpha=0.5).mean().sort_index()</span>
<span class="sd"> 2 0 2.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 3 2 3.0</span>
<span class="sd"> 3 3.0</span>
<span class="sd"> 4 3.0</span>
<span class="sd"> 4 5 4.0</span>
<span class="sd"> 6 4.0</span>
<span class="sd"> 7 4.0</span>
<span class="sd"> 8 4.0</span>
<span class="sd"> 5 9 5.0</span>
<span class="sd"> 10 5.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> For DataFrame, each ewm mean is computed column-wise.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;A&quot;: s.to_numpy(), &quot;B&quot;: s.to_numpy() ** 2})</span>
<span class="sd"> &gt;&gt;&gt; df.groupby(df.A).ewm(alpha=0.5).mean().sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> B</span>
<span class="sd"> A</span>
<span class="sd"> 2 0 4.0</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> 3 2 9.0</span>
<span class="sd"> 3 9.0</span>
<span class="sd"> 4 9.0</span>
<span class="sd"> 4 5 16.0</span>
<span class="sd"> 6 16.0</span>
<span class="sd"> 7 16.0</span>
<span class="sd"> 8 16.0</span>
<span class="sd"> 5 9 25.0</span>
<span class="sd"> 10 25.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="c1"># TODO: when add &#39;adjust&#39; parameter, should add to here too.</span>
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="k">return</span> <span class="p">(</span>
<span class="s2">&quot;ExponentialMovingGroupby [com=</span><span class="si">{}</span><span class="s2">, span=</span><span class="si">{}</span><span class="s2">, halflife=</span><span class="si">{}</span><span class="s2">, alpha=</span><span class="si">{}</span><span class="s2">, &quot;</span>
<span class="s2">&quot;min_periods=</span><span class="si">{}</span><span class="s2">, ignore_na=</span><span class="si">{}</span><span class="s2">]&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_com</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_span</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_halflife</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_alpha</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_min_periods</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_ignore_na</span><span class="p">,</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">def</span> <span class="nf">_test</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">doctest</span>
<span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span>
<span class="kn">import</span> <span class="nn">pyspark.pandas.window</span>
<span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s2">&quot;SPARK_HOME&quot;</span><span class="p">])</span>
<span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span><span class="o">.</span><span class="n">window</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">globs</span><span class="p">[</span><span class="s2">&quot;ps&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span>
<span class="n">spark</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">master</span><span class="p">(</span><span class="s2">&quot;local[4]&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">&quot;pyspark.pandas.window tests&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span>
<span class="p">)</span>
<span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span>
<span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span><span class="o">.</span><span class="n">window</span><span class="p">,</span>
<span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span>
<span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span> <span class="o">|</span> <span class="n">doctest</span><span class="o">.</span><span class="n">NORMALIZE_WHITESPACE</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">spark</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span>
<span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span>
<span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="n">_test</span><span class="p">()</span>
</pre></div>
</article>
<footer class="bd-footer-article">
<div class="footer-article-items footer-article__inner">
<div class="footer-article-item"><!-- Previous / next buttons -->
<div class="prev-next-area">
</div></div>
</div>
</footer>
</div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52"></script>
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item"><p class="copyright">
Copyright @ 2024 The Apache Software Foundation, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
</p></div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 4.5.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item"><p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.13.3.
</p></div>
</div>
</div>
</footer>
</body>
</html>