blob: 5b4daca577d75d086cce8dba58edbf1e3fbde36e [file] [log] [blame]
<!DOCTYPE html>
<html >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>pyspark.pandas.series &#8212; PySpark 4.0.0-preview1 documentation</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../../../_static/styles/theme.css?digest=e353d410970836974a52" rel="stylesheet" />
<link href="../../../_static/styles/bootstrap.css?digest=e353d410970836974a52" rel="stylesheet" />
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=e353d410970836974a52" rel="stylesheet" />
<link href="../../../_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=e353d410970836974a52" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../../../_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52" />
<script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/clipboard.min.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = '_modules/pyspark/pandas/series';</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/_modules/pyspark/pandas/series.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="None">
<!-- Matomo -->
<script type="text/javascript">
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '40']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a class="skip-link" href="#main-content">Skip to main content</a>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../../../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<nav class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../../../index.html">
<img src="../../../_static/spark-logo-light.png" class="logo__image only-light" alt="Logo image"/>
<script>document.write(`<img src="../../../_static/spark-logo-dark.png" class="logo__image only-dark" alt="Logo image"/>`);</script>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item"><nav class="navbar-nav">
<p class="sidebar-header-items__title"
role="heading"
aria-level="1"
aria-label="Site Navigation">
Site Navigation
</p>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../index.html">
Overview
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../getting_started/index.html">
Getting Started
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../user_guide/index.html">
User Guides
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../reference/index.html">
API Reference
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../development/index.html">
Development
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
</button>
`);
</script>
</div>
<div class="navbar-item"><!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
4.0.0-preview1
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "_modules/pyspark/pandas/series.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span>
<span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span>
<span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span>
<label class="sr-only">GitHub</label></a>
</li>
<li class="nav-item">
<a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span>
<label class="sr-only">PyPI</label></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
</button>
`);
</script>
</div>
</div>
</nav>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar hide-on-wide">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item"><nav class="navbar-nav">
<p class="sidebar-header-items__title"
role="heading"
aria-level="1"
aria-label="Site Navigation">
Site Navigation
</p>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../index.html">
Overview
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../getting_started/index.html">
Getting Started
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../user_guide/index.html">
User Guides
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../reference/index.html">
API Reference
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../development/index.html">
Development
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../../../migration_guide/index.html">
Migration Guides
</a>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item"><!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<div id="version-button" class="dropdown">
<button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
4.0.0-preview1
<span class="caret"></span>
</button>
<div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div>
<script type="text/javascript">
// Function to construct the target URL from the JSON components
function buildURL(entry) {
var template = "https://spark.apache.org/docs/{version}/api/python/index.html"; // supplied by jinja
template = template.replace("{version}", entry.version);
return template;
}
// Function to check if corresponding page path exists in other version of docs
// and, if so, go there instead of the homepage of the other docs version
function checkPageExistsAndRedirect(event) {
const currentFilePath = "_modules/pyspark/pandas/series.html",
otherDocsHomepage = event.target.getAttribute("href");
let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
$.ajax({
type: 'HEAD',
url: tryUrl,
// if the page exists, go there
success: function() {
location.href = tryUrl;
}
}).fail(function() {
location.href = otherDocsHomepage;
});
return false;
}
// Function to populate the version switcher
(function () {
// get JSON config
$.getJSON("https://spark.apache.org/static/versions.json", function(data, textStatus, jqXHR) {
// create the nodes first (before AJAX calls) to ensure the order is
// correct (for now, links will go to doc version homepage)
$.each(data, function(index, entry) {
// if no custom name specified (e.g., "latest"), use version string
if (!("name" in entry)) {
entry.name = entry.version;
}
// construct the appropriate URL, and add it to the dropdown
entry.url = buildURL(entry);
const node = document.createElement("a");
node.setAttribute("class", "list-group-item list-group-item-action py-1");
node.setAttribute("href", `${entry.url}`);
node.textContent = `${entry.name}`;
node.onclick = checkPageExistsAndRedirect;
$("#version_switcher").append(node);
});
});
})();
</script></div>
<div class="navbar-item">
<script>
document.write(`
<button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span>
<span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span>
<span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/spark" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-github"></i></span>
<label class="sr-only">GitHub</label></a>
</li>
<li class="nav-item">
<a href="https://pypi.org/project/pyspark" title="PyPI" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-solid fa-box"></i></span>
<label class="sr-only">PyPI</label></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumbs">
<ul class="bd-breadcrumbs" role="navigation" aria-label="Breadcrumb">
<li class="breadcrumb-item breadcrumb-home">
<a href="../../../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="../../index.html" class="nav-link">Module code</a></li>
<li class="breadcrumb-item active" aria-current="page">pyspark.pandas.series</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article" role="main">
<h1>Source code for pyspark.pandas.series</h1><div class="highlight"><pre>
<span></span><span class="c1">#</span>
<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
<span class="c1"># this work for additional information regarding copyright ownership.</span>
<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
<span class="c1"># (the &quot;License&quot;); you may not use this file except in compliance with</span>
<span class="c1"># the License. You may obtain a copy of the License at</span>
<span class="c1">#</span>
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="c1">#</span>
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
<span class="c1"># See the License for the specific language governing permissions and</span>
<span class="c1"># limitations under the License.</span>
<span class="c1">#</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd">A wrapper class for Spark Column to behave like pandas Series.</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="kn">import</span> <span class="nn">datetime</span>
<span class="kn">import</span> <span class="nn">re</span>
<span class="kn">import</span> <span class="nn">inspect</span>
<span class="kn">import</span> <span class="nn">warnings</span>
<span class="kn">from</span> <span class="nn">collections.abc</span> <span class="kn">import</span> <span class="n">Mapping</span>
<span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="n">partial</span><span class="p">,</span> <span class="n">reduce</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">Any</span><span class="p">,</span>
<span class="n">Callable</span><span class="p">,</span>
<span class="n">Dict</span><span class="p">,</span>
<span class="n">Generic</span><span class="p">,</span>
<span class="n">IO</span><span class="p">,</span>
<span class="n">Iterable</span><span class="p">,</span>
<span class="n">List</span><span class="p">,</span>
<span class="n">Optional</span><span class="p">,</span>
<span class="n">Sequence</span><span class="p">,</span>
<span class="n">Tuple</span><span class="p">,</span>
<span class="n">Type</span><span class="p">,</span>
<span class="n">Union</span><span class="p">,</span>
<span class="n">cast</span><span class="p">,</span>
<span class="n">no_type_check</span><span class="p">,</span>
<span class="n">overload</span><span class="p">,</span>
<span class="n">TYPE_CHECKING</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<span class="kn">from</span> <span class="nn">pandas.core.accessor</span> <span class="kn">import</span> <span class="n">CachedAccessor</span>
<span class="kn">from</span> <span class="nn">pandas.io.formats.printing</span> <span class="kn">import</span> <span class="n">pprint_thing</span>
<span class="kn">from</span> <span class="nn">pandas.api.types</span> <span class="kn">import</span> <span class="p">(</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="n">is_list_like</span><span class="p">,</span>
<span class="n">is_hashable</span><span class="p">,</span>
<span class="n">CategoricalDtype</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pandas.tseries.frequencies</span> <span class="kn">import</span> <span class="n">DateOffset</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">F</span><span class="p">,</span> <span class="n">Column</span> <span class="k">as</span> <span class="n">PySparkColumn</span><span class="p">,</span> <span class="n">DataFrame</span> <span class="k">as</span> <span class="n">SparkDataFrame</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">ArrayType</span><span class="p">,</span>
<span class="n">BooleanType</span><span class="p">,</span>
<span class="n">DecimalType</span><span class="p">,</span>
<span class="n">DoubleType</span><span class="p">,</span>
<span class="n">FloatType</span><span class="p">,</span>
<span class="n">IntegerType</span><span class="p">,</span>
<span class="n">LongType</span><span class="p">,</span>
<span class="n">NumericType</span><span class="p">,</span>
<span class="n">Row</span><span class="p">,</span>
<span class="n">StructType</span><span class="p">,</span>
<span class="n">TimestampType</span><span class="p">,</span>
<span class="n">NullType</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.window</span> <span class="kn">import</span> <span class="n">Window</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.utils</span> <span class="kn">import</span> <span class="n">get_column_class</span><span class="p">,</span> <span class="n">get_window_class</span>
<span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">pandas</span> <span class="k">as</span> <span class="n">ps</span> <span class="c1"># For running doctests and reference resolution in PyCharm.</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas._typing</span> <span class="kn">import</span> <span class="n">Axis</span><span class="p">,</span> <span class="n">Dtype</span><span class="p">,</span> <span class="n">Label</span><span class="p">,</span> <span class="n">Name</span><span class="p">,</span> <span class="n">Scalar</span><span class="p">,</span> <span class="n">T</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.accessors</span> <span class="kn">import</span> <span class="n">PandasOnSparkSeriesMethods</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.categorical</span> <span class="kn">import</span> <span class="n">CategoricalAccessor</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.config</span> <span class="kn">import</span> <span class="n">get_option</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.correlation</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">compute</span><span class="p">,</span>
<span class="n">CORRELATION_VALUE_1_COLUMN</span><span class="p">,</span>
<span class="n">CORRELATION_VALUE_2_COLUMN</span><span class="p">,</span>
<span class="n">CORRELATION_CORR_OUTPUT_COLUMN</span><span class="p">,</span>
<span class="n">CORRELATION_COUNT_OUTPUT_COLUMN</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.base</span> <span class="kn">import</span> <span class="n">IndexOpsMixin</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.exceptions</span> <span class="kn">import</span> <span class="n">SparkPandasIndexingError</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.frame</span> <span class="kn">import</span> <span class="n">DataFrame</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.generic</span> <span class="kn">import</span> <span class="n">Frame</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.internal</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">InternalField</span><span class="p">,</span>
<span class="n">InternalFrame</span><span class="p">,</span>
<span class="n">DEFAULT_SERIES_NAME</span><span class="p">,</span>
<span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">,</span>
<span class="n">SPARK_DEFAULT_INDEX_NAME</span><span class="p">,</span>
<span class="n">SPARK_DEFAULT_SERIES_NAME</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.missing.series</span> <span class="kn">import</span> <span class="n">MissingPandasLikeSeries</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.plot</span> <span class="kn">import</span> <span class="n">PandasOnSparkPlotAccessor</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.utils</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">combine_frames</span><span class="p">,</span>
<span class="n">is_name_like_tuple</span><span class="p">,</span>
<span class="n">is_name_like_value</span><span class="p">,</span>
<span class="n">name_like_string</span><span class="p">,</span>
<span class="n">same_anchor</span><span class="p">,</span>
<span class="n">scol_for</span><span class="p">,</span>
<span class="n">sql_conf</span><span class="p">,</span>
<span class="n">validate_arguments_and_invoke_function</span><span class="p">,</span>
<span class="n">validate_axis</span><span class="p">,</span>
<span class="n">validate_bool_kwarg</span><span class="p">,</span>
<span class="n">verify_temp_column_name</span><span class="p">,</span>
<span class="n">SPARK_CONF_ARROW_ENABLED</span><span class="p">,</span>
<span class="n">log_advice</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.datetimes</span> <span class="kn">import</span> <span class="n">DatetimeMethods</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.spark</span> <span class="kn">import</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">SF</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.spark.accessors</span> <span class="kn">import</span> <span class="n">SparkSeriesMethods</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.strings</span> <span class="kn">import</span> <span class="n">StringMethods</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.typedef</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">infer_return_type</span><span class="p">,</span>
<span class="n">spark_type_to_pandas_dtype</span><span class="p">,</span>
<span class="n">ScalarType</span><span class="p">,</span>
<span class="n">SeriesType</span><span class="p">,</span>
<span class="n">create_type_for_series_type</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.typedef.typehints</span> <span class="kn">import</span> <span class="n">as_spark_type</span>
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">pyspark.sql._typing</span> <span class="kn">import</span> <span class="n">ColumnOrName</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.groupby</span> <span class="kn">import</span> <span class="n">SeriesGroupBy</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.resample</span> <span class="kn">import</span> <span class="n">SeriesResampler</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.indexes</span> <span class="kn">import</span> <span class="n">Index</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.spark.accessors</span> <span class="kn">import</span> <span class="n">SparkIndexOpsMethods</span>
<span class="c1"># This regular expression pattern is compiled and defined here to avoid to compile the same</span>
<span class="c1"># pattern every time it is used in _repr_ in Series.</span>
<span class="c1"># This pattern basically seeks the footer string from pandas&#39;</span>
<span class="n">REPR_PATTERN</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s2">&quot;Length: (?P&lt;length&gt;[0-9]+)&quot;</span><span class="p">)</span>
<span class="n">_flex_doc_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Return </span><span class="si">{desc}</span><span class="s2"> of series and other, element-wise (binary operator `</span><span class="si">{op_name}</span><span class="s2">`).</span>
<span class="s2">Equivalent to ``</span><span class="si">{equiv}</span><span class="s2">``</span>
<span class="s2">Parameters</span>
<span class="s2">----------</span>
<span class="s2">other : Series or scalar value</span>
<span class="s2">fill_value : Scalar value, default None</span>
<span class="s2">Returns</span>
<span class="s2">-------</span>
<span class="s2">Series</span>
<span class="s2"> The result of the operation.</span>
<span class="s2">See Also</span>
<span class="s2">--------</span>
<span class="s2">Series.</span><span class="si">{reverse}</span>
<span class="si">{series_examples}</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">_add_example_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Examples</span>
<span class="s2">--------</span>
<span class="s2">&gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [2, 2, 4, np.nan],</span>
<span class="s2">... &#39;b&#39;: [2, np.nan, 2, np.nan]},</span>
<span class="s2">... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="s2">&gt;&gt;&gt; df</span>
<span class="s2"> a b</span>
<span class="s2">a 2.0 2.0</span>
<span class="s2">b 2.0 NaN</span>
<span class="s2">c 4.0 2.0</span>
<span class="s2">d NaN NaN</span>
<span class="s2">&gt;&gt;&gt; df.a.add(df.b)</span>
<span class="s2">a 4.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 6.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&gt;&gt;&gt; df.a.radd(df.b)</span>
<span class="s2">a 4.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 6.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">_sub_example_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Examples</span>
<span class="s2">--------</span>
<span class="s2">&gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [2, 2, 4, np.nan],</span>
<span class="s2">... &#39;b&#39;: [2, np.nan, 2, np.nan]},</span>
<span class="s2">... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="s2">&gt;&gt;&gt; df</span>
<span class="s2"> a b</span>
<span class="s2">a 2.0 2.0</span>
<span class="s2">b 2.0 NaN</span>
<span class="s2">c 4.0 2.0</span>
<span class="s2">d NaN NaN</span>
<span class="s2">&gt;&gt;&gt; df.a.subtract(df.b)</span>
<span class="s2">a 0.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 2.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&gt;&gt;&gt; df.a.rsub(df.b)</span>
<span class="s2">a 0.0</span>
<span class="s2">b NaN</span>
<span class="s2">c -2.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">_mul_example_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Examples</span>
<span class="s2">--------</span>
<span class="s2">&gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [2, 2, 4, np.nan],</span>
<span class="s2">... &#39;b&#39;: [2, np.nan, 2, np.nan]},</span>
<span class="s2">... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="s2">&gt;&gt;&gt; df</span>
<span class="s2"> a b</span>
<span class="s2">a 2.0 2.0</span>
<span class="s2">b 2.0 NaN</span>
<span class="s2">c 4.0 2.0</span>
<span class="s2">d NaN NaN</span>
<span class="s2">&gt;&gt;&gt; df.a.multiply(df.b)</span>
<span class="s2">a 4.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 8.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&gt;&gt;&gt; df.a.rmul(df.b)</span>
<span class="s2">a 4.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 8.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">_div_example_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Examples</span>
<span class="s2">--------</span>
<span class="s2">&gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [2, 2, 4, np.nan],</span>
<span class="s2">... &#39;b&#39;: [2, np.nan, 2, np.nan]},</span>
<span class="s2">... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="s2">&gt;&gt;&gt; df</span>
<span class="s2"> a b</span>
<span class="s2">a 2.0 2.0</span>
<span class="s2">b 2.0 NaN</span>
<span class="s2">c 4.0 2.0</span>
<span class="s2">d NaN NaN</span>
<span class="s2">&gt;&gt;&gt; df.a.divide(df.b)</span>
<span class="s2">a 1.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 2.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&gt;&gt;&gt; df.a.rdiv(df.b)</span>
<span class="s2">a 1.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 0.5</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">_pow_example_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Examples</span>
<span class="s2">--------</span>
<span class="s2">&gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [2, 2, 4, np.nan],</span>
<span class="s2">... &#39;b&#39;: [2, np.nan, 2, np.nan]},</span>
<span class="s2">... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="s2">&gt;&gt;&gt; df</span>
<span class="s2"> a b</span>
<span class="s2">a 2.0 2.0</span>
<span class="s2">b 2.0 NaN</span>
<span class="s2">c 4.0 2.0</span>
<span class="s2">d NaN NaN</span>
<span class="s2">&gt;&gt;&gt; df.a.pow(df.b)</span>
<span class="s2">a 4.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 16.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&gt;&gt;&gt; df.a.rpow(df.b)</span>
<span class="s2">a 4.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 16.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">_mod_example_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Examples</span>
<span class="s2">--------</span>
<span class="s2">&gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [2, 2, 4, np.nan],</span>
<span class="s2">... &#39;b&#39;: [2, np.nan, 2, np.nan]},</span>
<span class="s2">... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="s2">&gt;&gt;&gt; df</span>
<span class="s2"> a b</span>
<span class="s2">a 2.0 2.0</span>
<span class="s2">b 2.0 NaN</span>
<span class="s2">c 4.0 2.0</span>
<span class="s2">d NaN NaN</span>
<span class="s2">&gt;&gt;&gt; df.a.mod(df.b)</span>
<span class="s2">a 0.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 0.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&gt;&gt;&gt; df.a.rmod(df.b)</span>
<span class="s2">a 0.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 2.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">_floordiv_example_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Examples</span>
<span class="s2">--------</span>
<span class="s2">&gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [2, 2, 4, np.nan],</span>
<span class="s2">... &#39;b&#39;: [2, np.nan, 2, np.nan]},</span>
<span class="s2">... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="s2">&gt;&gt;&gt; df</span>
<span class="s2"> a b</span>
<span class="s2">a 2.0 2.0</span>
<span class="s2">b 2.0 NaN</span>
<span class="s2">c 4.0 2.0</span>
<span class="s2">d NaN NaN</span>
<span class="s2">&gt;&gt;&gt; df.a.floordiv(df.b)</span>
<span class="s2">a 1.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 2.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&gt;&gt;&gt; df.a.rfloordiv(df.b)</span>
<span class="s2">a 1.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 0.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="c1"># Needed to disambiguate Series.str and str type</span>
<span class="n">str_type</span> <span class="o">=</span> <span class="nb">str</span>
<div class="viewcode-block" id="Series"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.html#pyspark.pandas.Series">[docs]</a><span class="k">class</span> <span class="nc">Series</span><span class="p">(</span><span class="n">Frame</span><span class="p">,</span> <span class="n">IndexOpsMixin</span><span class="p">,</span> <span class="n">Generic</span><span class="p">[</span><span class="n">T</span><span class="p">]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> pandas-on-Spark Series that corresponds to pandas Series logically. This holds Spark Column</span>
<span class="sd"> internally.</span>
<span class="sd"> :ivar _internal: an internal immutable Frame to manage metadata.</span>
<span class="sd"> :type _internal: InternalFrame</span>
<span class="sd"> :ivar _psdf: Parent&#39;s pandas-on-Spark DataFrame</span>
<span class="sd"> :type _psdf: ps.DataFrame</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> data : array-like, dict, or scalar value, pandas Series</span>
<span class="sd"> Contains data stored in Series</span>
<span class="sd"> Note that if `data` is a pandas Series, other arguments should not be used.</span>
<span class="sd"> index : array-like or Index (1d)</span>
<span class="sd"> Values must be hashable and have the same length as `data`.</span>
<span class="sd"> Non-unique index values are allowed. Will default to</span>
<span class="sd"> RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index</span>
<span class="sd"> sequence is used, the index will override the keys found in the</span>
<span class="sd"> dict.</span>
<span class="sd"> dtype : numpy.dtype or None</span>
<span class="sd"> If None, dtype will be inferred</span>
<span class="sd"> copy : boolean, default False</span>
<span class="sd"> Copy input data</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> <span class="c1"># type: ignore[no-untyped-def]</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">fastpath</span><span class="o">=</span><span class="kc">False</span>
<span class="p">):</span>
<span class="k">assert</span> <span class="n">data</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_anchor</span><span class="p">:</span> <span class="n">DataFrame</span> <span class="c1"># type: ignore[annotation-unchecked]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_col_label</span><span class="p">:</span> <span class="n">Label</span> <span class="c1"># type: ignore[annotation-unchecked]</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">):</span>
<span class="k">assert</span> <span class="n">dtype</span> <span class="ow">is</span> <span class="kc">None</span>
<span class="k">assert</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span>
<span class="k">assert</span> <span class="ow">not</span> <span class="n">copy</span>
<span class="k">assert</span> <span class="ow">not</span> <span class="n">fastpath</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_anchor</span> <span class="o">=</span> <span class="n">data</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_col_label</span> <span class="o">=</span> <span class="n">index</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span>
<span class="k">assert</span> <span class="n">index</span> <span class="ow">is</span> <span class="kc">None</span>
<span class="k">assert</span> <span class="n">dtype</span> <span class="ow">is</span> <span class="kc">None</span>
<span class="k">assert</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span>
<span class="k">assert</span> <span class="ow">not</span> <span class="n">copy</span>
<span class="k">assert</span> <span class="ow">not</span> <span class="n">fastpath</span>
<span class="n">s</span> <span class="o">=</span> <span class="n">data</span>
<span class="k">else</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.indexes.base</span> <span class="kn">import</span> <span class="n">Index</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">index</span><span class="p">,</span> <span class="n">Index</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;The given index cannot be a pandas-on-Spark index. &quot;</span>
<span class="s2">&quot;Try pandas index or array-like.&quot;</span>
<span class="p">)</span>
<span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span>
<span class="n">data</span><span class="o">=</span><span class="n">data</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="n">index</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="n">copy</span><span class="p">,</span> <span class="n">fastpath</span><span class="o">=</span><span class="n">fastpath</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="o">.</span><span class="n">from_pandas</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">s</span><span class="p">))</span>
<span class="k">if</span> <span class="n">s</span><span class="o">.</span><span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">column_labels</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">])</span>
<span class="n">anchor</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_anchor</span> <span class="o">=</span> <span class="n">anchor</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_col_label</span> <span class="o">=</span> <span class="n">anchor</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="nb">object</span><span class="o">.</span><span class="fm">__setattr__</span><span class="p">(</span><span class="n">anchor</span><span class="p">,</span> <span class="s2">&quot;_psseries&quot;</span><span class="p">,</span> <span class="p">{</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">:</span> <span class="bp">self</span><span class="p">})</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">_psdf</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_anchor</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">_internal</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">InternalFrame</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">select_column</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">_column_label</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Label</span><span class="p">]:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_col_label</span>
<span class="k">def</span> <span class="nf">_update_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">psdf</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">assert</span> <span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span> <span class="o">==</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">],</span> <span class="p">(</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">,</span>
<span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">],</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_anchor</span> <span class="o">=</span> <span class="n">psdf</span>
<span class="nb">object</span><span class="o">.</span><span class="fm">__setattr__</span><span class="p">(</span><span class="n">psdf</span><span class="p">,</span> <span class="s2">&quot;_psseries&quot;</span><span class="p">,</span> <span class="p">{</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">:</span> <span class="bp">self</span><span class="p">})</span>
<span class="k">def</span> <span class="nf">_with_new_scol</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">scol</span><span class="p">:</span> <span class="n">PySparkColumn</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">InternalField</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Copy pandas-on-Spark Series with the new Spark Column.</span>
<span class="sd"> :param scol: the new Spark Column</span>
<span class="sd"> :return: the copied Series</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">name</span> <span class="o">=</span> <span class="n">name_like_string</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name</span><span class="p">)],</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span>
<span class="n">field</span> <span class="k">if</span> <span class="n">field</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">field</span><span class="o">.</span><span class="n">struct_field</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">field</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">)</span>
<span class="p">],</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span>
<span class="n">spark</span><span class="p">:</span> <span class="s2">&quot;SparkIndexOpsMethods&quot;</span> <span class="o">=</span> <span class="n">CachedAccessor</span><span class="p">(</span> <span class="c1"># type: ignore[assignment]</span>
<span class="s2">&quot;spark&quot;</span><span class="p">,</span> <span class="n">SparkSeriesMethods</span>
<span class="p">)</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">dtypes</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dtype</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Return the dtype object of the underlying data.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(list(&#39;abc&#39;))</span>
<span class="sd"> &gt;&gt;&gt; s.dtype == s.dtypes</span>
<span class="sd"> True</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">axes</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="s2">&quot;Index&quot;</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return a list of the row axis labels.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; psser.axes</span>
<span class="sd"> [Index([0, 1, 2], dtype=&#39;int64&#39;)]</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">]</span>
<span class="c1"># Arithmetic Operators</span>
<div class="viewcode-block" id="Series.add"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.add.html#pyspark.pandas.Series.add">[docs]</a> <span class="k">def</span> <span class="nf">add</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="n">fill_value</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">if</span> <span class="n">fill_value</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">float</span><span class="p">)):</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">isNull</span><span class="p">()</span> <span class="o">|</span> <span class="n">F</span><span class="o">.</span><span class="n">isnan</span><span class="p">(</span><span class="n">scol</span><span class="p">),</span> <span class="n">fill_value</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="bp">self</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span>
<span class="s2">&quot;`fill_value` currently only works when type of `other` is in (int, str, float)&quot;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">+</span> <span class="n">other</span></div>
<span class="n">add</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Addition&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series + other&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;radd&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_add_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.radd"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.radd.html#pyspark.pandas.Series.radd">[docs]</a> <span class="k">def</span> <span class="nf">radd</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="n">fill_value</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">if</span> <span class="n">fill_value</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">float</span><span class="p">)):</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">isNull</span><span class="p">()</span> <span class="o">|</span> <span class="n">F</span><span class="o">.</span><span class="n">isnan</span><span class="p">(</span><span class="n">scol</span><span class="p">),</span> <span class="n">fill_value</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="bp">self</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span>
<span class="s2">&quot;`fill_value` currently only works when type of `other` is in (int, str, float)&quot;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">other</span> <span class="o">+</span> <span class="bp">self</span></div>
<span class="n">radd</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Addition&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other + series&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;add&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_add_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.div"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.div.html#pyspark.pandas.Series.div">[docs]</a> <span class="k">def</span> <span class="nf">div</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">/</span> <span class="n">other</span></div>
<span class="n">div</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Floating division&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;/&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series / other&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;rdiv&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_div_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">divide</span> <span class="o">=</span> <span class="n">div</span>
<div class="viewcode-block" id="Series.rdiv"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rdiv.html#pyspark.pandas.Series.rdiv">[docs]</a> <span class="k">def</span> <span class="nf">rdiv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">other</span> <span class="o">/</span> <span class="bp">self</span></div>
<span class="n">rdiv</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Floating division&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;/&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other / series&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;div&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_div_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.truediv"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.truediv.html#pyspark.pandas.Series.truediv">[docs]</a> <span class="k">def</span> <span class="nf">truediv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">/</span> <span class="n">other</span></div>
<span class="n">truediv</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Floating division&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;/&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series / other&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;rtruediv&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_div_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.rtruediv"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rtruediv.html#pyspark.pandas.Series.rtruediv">[docs]</a> <span class="k">def</span> <span class="nf">rtruediv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">other</span> <span class="o">/</span> <span class="bp">self</span></div>
<span class="n">rtruediv</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Floating division&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;/&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other / series&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;truediv&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_div_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.mul"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.mul.html#pyspark.pandas.Series.mul">[docs]</a> <span class="k">def</span> <span class="nf">mul</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">*</span> <span class="n">other</span></div>
<span class="n">mul</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Multiplication&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;*&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series * other&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;rmul&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_mul_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">multiply</span> <span class="o">=</span> <span class="n">mul</span>
<div class="viewcode-block" id="Series.rmul"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rmul.html#pyspark.pandas.Series.rmul">[docs]</a> <span class="k">def</span> <span class="nf">rmul</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">other</span> <span class="o">*</span> <span class="bp">self</span></div>
<span class="n">rmul</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Multiplication&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;*&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other * series&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;mul&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_mul_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.sub"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.sub.html#pyspark.pandas.Series.sub">[docs]</a> <span class="k">def</span> <span class="nf">sub</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">-</span> <span class="n">other</span></div>
<span class="n">sub</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Subtraction&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;-&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series - other&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;rsub&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_sub_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">subtract</span> <span class="o">=</span> <span class="n">sub</span>
<div class="viewcode-block" id="Series.rsub"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rsub.html#pyspark.pandas.Series.rsub">[docs]</a> <span class="k">def</span> <span class="nf">rsub</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">other</span> <span class="o">-</span> <span class="bp">self</span></div>
<span class="n">rsub</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Subtraction&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;-&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other - series&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;sub&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_sub_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.mod"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.mod.html#pyspark.pandas.Series.mod">[docs]</a> <span class="k">def</span> <span class="nf">mod</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">%</span> <span class="n">other</span></div>
<span class="n">mod</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Modulo&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;%&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series </span><span class="si">% o</span><span class="s2">ther&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;rmod&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_mod_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.rmod"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rmod.html#pyspark.pandas.Series.rmod">[docs]</a> <span class="k">def</span> <span class="nf">rmod</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">other</span> <span class="o">%</span> <span class="bp">self</span></div>
<span class="n">rmod</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Modulo&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;%&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other </span><span class="si">% s</span><span class="s2">eries&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;mod&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_mod_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.pow"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.pow.html#pyspark.pandas.Series.pow">[docs]</a> <span class="k">def</span> <span class="nf">pow</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">**</span><span class="n">other</span></div>
<span class="nb">pow</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Exponential power of series&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;**&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series ** other&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;rpow&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_pow_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.rpow"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rpow.html#pyspark.pandas.Series.rpow">[docs]</a> <span class="k">def</span> <span class="nf">rpow</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">other</span><span class="o">**</span><span class="bp">self</span></div>
<span class="n">rpow</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Exponential power&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;**&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other ** series&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;pow&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_pow_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.floordiv"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.floordiv.html#pyspark.pandas.Series.floordiv">[docs]</a> <span class="k">def</span> <span class="nf">floordiv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">//</span> <span class="n">other</span></div>
<span class="n">floordiv</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Integer division&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;//&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series // other&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;rfloordiv&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_floordiv_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.rfloordiv"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rfloordiv.html#pyspark.pandas.Series.rfloordiv">[docs]</a> <span class="k">def</span> <span class="nf">rfloordiv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">other</span> <span class="o">//</span> <span class="bp">self</span></div>
<span class="n">rfloordiv</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Integer division&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;//&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other // series&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;floordiv&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_floordiv_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<span class="c1"># create accessor for pandas-on-Spark specific methods.</span>
<span class="n">pandas_on_spark</span> <span class="o">=</span> <span class="n">CachedAccessor</span><span class="p">(</span><span class="s2">&quot;pandas_on_spark&quot;</span><span class="p">,</span> <span class="n">PandasOnSparkSeriesMethods</span><span class="p">)</span>
<span class="c1"># Comparison Operators</span>
<div class="viewcode-block" id="Series.eq"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.eq.html#pyspark.pandas.Series.eq">[docs]</a> <span class="k">def</span> <span class="nf">eq</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compare if the current value is equal to the other.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [1, 2, 3, 4],</span>
<span class="sd"> ... &#39;b&#39;: [1, np.nan, 1, np.nan]},</span>
<span class="sd"> ... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df.a == 1</span>
<span class="sd"> a True</span>
<span class="sd"> b False</span>
<span class="sd"> c False</span>
<span class="sd"> d False</span>
<span class="sd"> Name: a, dtype: bool</span>
<span class="sd"> &gt;&gt;&gt; df.b.eq(1)</span>
<span class="sd"> a True</span>
<span class="sd"> b False</span>
<span class="sd"> c True</span>
<span class="sd"> d False</span>
<span class="sd"> Name: b, dtype: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">==</span> <span class="n">other</span></div>
<span class="n">equals</span> <span class="o">=</span> <span class="n">eq</span>
<div class="viewcode-block" id="Series.gt"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.gt.html#pyspark.pandas.Series.gt">[docs]</a> <span class="k">def</span> <span class="nf">gt</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compare if the current value is greater than the other.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [1, 2, 3, 4],</span>
<span class="sd"> ... &#39;b&#39;: [1, np.nan, 1, np.nan]},</span>
<span class="sd"> ... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df.a &gt; 1</span>
<span class="sd"> a False</span>
<span class="sd"> b True</span>
<span class="sd"> c True</span>
<span class="sd"> d True</span>
<span class="sd"> Name: a, dtype: bool</span>
<span class="sd"> &gt;&gt;&gt; df.b.gt(1)</span>
<span class="sd"> a False</span>
<span class="sd"> b False</span>
<span class="sd"> c False</span>
<span class="sd"> d False</span>
<span class="sd"> Name: b, dtype: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">&gt;</span> <span class="n">other</span></div>
<div class="viewcode-block" id="Series.ge"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.ge.html#pyspark.pandas.Series.ge">[docs]</a> <span class="k">def</span> <span class="nf">ge</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compare if the current value is greater than or equal to the other.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [1, 2, 3, 4],</span>
<span class="sd"> ... &#39;b&#39;: [1, np.nan, 1, np.nan]},</span>
<span class="sd"> ... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df.a &gt;= 2</span>
<span class="sd"> a False</span>
<span class="sd"> b True</span>
<span class="sd"> c True</span>
<span class="sd"> d True</span>
<span class="sd"> Name: a, dtype: bool</span>
<span class="sd"> &gt;&gt;&gt; df.b.ge(2)</span>
<span class="sd"> a False</span>
<span class="sd"> b False</span>
<span class="sd"> c False</span>
<span class="sd"> d False</span>
<span class="sd"> Name: b, dtype: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">&gt;=</span> <span class="n">other</span></div>
<div class="viewcode-block" id="Series.lt"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.lt.html#pyspark.pandas.Series.lt">[docs]</a> <span class="k">def</span> <span class="nf">lt</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compare if the current value is less than the other.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [1, 2, 3, 4],</span>
<span class="sd"> ... &#39;b&#39;: [1, np.nan, 1, np.nan]},</span>
<span class="sd"> ... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df.a &lt; 1</span>
<span class="sd"> a False</span>
<span class="sd"> b False</span>
<span class="sd"> c False</span>
<span class="sd"> d False</span>
<span class="sd"> Name: a, dtype: bool</span>
<span class="sd"> &gt;&gt;&gt; df.b.lt(2)</span>
<span class="sd"> a True</span>
<span class="sd"> b False</span>
<span class="sd"> c True</span>
<span class="sd"> d False</span>
<span class="sd"> Name: b, dtype: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">&lt;</span> <span class="n">other</span></div>
<div class="viewcode-block" id="Series.le"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.le.html#pyspark.pandas.Series.le">[docs]</a> <span class="k">def</span> <span class="nf">le</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compare if the current value is less than or equal to the other.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [1, 2, 3, 4],</span>
<span class="sd"> ... &#39;b&#39;: [1, np.nan, 1, np.nan]},</span>
<span class="sd"> ... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df.a &lt;= 2</span>
<span class="sd"> a True</span>
<span class="sd"> b True</span>
<span class="sd"> c False</span>
<span class="sd"> d False</span>
<span class="sd"> Name: a, dtype: bool</span>
<span class="sd"> &gt;&gt;&gt; df.b.le(2)</span>
<span class="sd"> a True</span>
<span class="sd"> b False</span>
<span class="sd"> c True</span>
<span class="sd"> d False</span>
<span class="sd"> Name: b, dtype: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">&lt;=</span> <span class="n">other</span></div>
<div class="viewcode-block" id="Series.ne"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.ne.html#pyspark.pandas.Series.ne">[docs]</a> <span class="k">def</span> <span class="nf">ne</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compare if the current value is not equal to the other.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [1, 2, 3, 4],</span>
<span class="sd"> ... &#39;b&#39;: [1, np.nan, 1, np.nan]},</span>
<span class="sd"> ... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df.a != 1</span>
<span class="sd"> a False</span>
<span class="sd"> b True</span>
<span class="sd"> c True</span>
<span class="sd"> d True</span>
<span class="sd"> Name: a, dtype: bool</span>
<span class="sd"> &gt;&gt;&gt; df.b.ne(1)</span>
<span class="sd"> a False</span>
<span class="sd"> b True</span>
<span class="sd"> c False</span>
<span class="sd"> d True</span>
<span class="sd"> Name: b, dtype: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">!=</span> <span class="n">other</span></div>
<div class="viewcode-block" id="Series.divmod"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.divmod.html#pyspark.pandas.Series.divmod">[docs]</a> <span class="k">def</span> <span class="nf">divmod</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return Integer division and modulo of series and other, element-wise</span>
<span class="sd"> (binary operator `divmod`).</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series or scalar value</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> 2-Tuple of Series</span>
<span class="sd"> The result of the operation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.rdivmod</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">floordiv</span><span class="p">(</span><span class="n">other</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">mod</span><span class="p">(</span><span class="n">other</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.rdivmod"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rdivmod.html#pyspark.pandas.Series.rdivmod">[docs]</a> <span class="k">def</span> <span class="nf">rdivmod</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return Integer division and modulo of series and other, element-wise</span>
<span class="sd"> (binary operator `rdivmod`).</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series or scalar value</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> 2-Tuple of Series</span>
<span class="sd"> The result of the operation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.divmod</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">rfloordiv</span><span class="p">(</span><span class="n">other</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">rmod</span><span class="p">(</span><span class="n">other</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.between"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.between.html#pyspark.pandas.Series.between">[docs]</a> <span class="k">def</span> <span class="nf">between</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">left</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="n">right</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="n">inclusive</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;both&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return boolean Series equivalent to left &lt;= series &lt;= right.</span>
<span class="sd"> This function returns a boolean vector containing `True` wherever the</span>
<span class="sd"> corresponding Series element is between the boundary values `left` and</span>
<span class="sd"> `right`. NA values are treated as `False`.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> left : scalar or list-like</span>
<span class="sd"> Left boundary.</span>
<span class="sd"> right : scalar or list-like</span>
<span class="sd"> Right boundary.</span>
<span class="sd"> inclusive : {&quot;both&quot;, &quot;neither&quot;, &quot;left&quot;, &quot;right&quot;}</span>
<span class="sd"> Include boundaries. Whether to set each bound as closed or open.</span>
<span class="sd"> .. versionchanged:: 4.0.0</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series representing whether each element is between left and</span>
<span class="sd"> right (inclusive).</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.gt : Greater than of series and other.</span>
<span class="sd"> Series.lt : Less than of series and other.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> This function is equivalent to ``(left &lt;= ser) &amp; (ser &lt;= right)``</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 0, 4, 8, np.nan])</span>
<span class="sd"> Boundary values are included by default:</span>
<span class="sd"> &gt;&gt;&gt; s.between(0, 4)</span>
<span class="sd"> 0 True</span>
<span class="sd"> 1 True</span>
<span class="sd"> 2 True</span>
<span class="sd"> 3 False</span>
<span class="sd"> 4 False</span>
<span class="sd"> dtype: bool</span>
<span class="sd"> With `inclusive` set to &quot;neither&quot; boundary values are excluded:</span>
<span class="sd"> &gt;&gt;&gt; s.between(0, 4, inclusive=&quot;neither&quot;)</span>
<span class="sd"> 0 True</span>
<span class="sd"> 1 False</span>
<span class="sd"> 2 False</span>
<span class="sd"> 3 False</span>
<span class="sd"> 4 False</span>
<span class="sd"> dtype: bool</span>
<span class="sd"> With `inclusive` set to &quot;right&quot; only right boundary value is included:</span>
<span class="sd"> &gt;&gt;&gt; s.between(0, 4, inclusive=&quot;right&quot;)</span>
<span class="sd"> 0 True</span>
<span class="sd"> 1 False</span>
<span class="sd"> 2 True</span>
<span class="sd"> 3 False</span>
<span class="sd"> 4 False</span>
<span class="sd"> dtype: bool</span>
<span class="sd"> With `inclusive` set to &quot;left&quot; only left boundary value is included:</span>
<span class="sd"> &gt;&gt;&gt; s.between(0, 4, inclusive=&quot;left&quot;)</span>
<span class="sd"> 0 True</span>
<span class="sd"> 1 True</span>
<span class="sd"> 2 False</span>
<span class="sd"> 3 False</span>
<span class="sd"> 4 False</span>
<span class="sd"> dtype: bool</span>
<span class="sd"> `left` and `right` can be any scalar value:</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&#39;Alice&#39;, &#39;Bob&#39;, &#39;Carol&#39;, &#39;Eve&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s.between(&#39;Anna&#39;, &#39;Daniel&#39;)</span>
<span class="sd"> 0 False</span>
<span class="sd"> 1 True</span>
<span class="sd"> 2 True</span>
<span class="sd"> 3 False</span>
<span class="sd"> dtype: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">inclusive</span> <span class="o">==</span> <span class="s2">&quot;both&quot;</span><span class="p">:</span>
<span class="n">lmask</span> <span class="o">=</span> <span class="bp">self</span> <span class="o">&gt;=</span> <span class="n">left</span>
<span class="n">rmask</span> <span class="o">=</span> <span class="bp">self</span> <span class="o">&lt;=</span> <span class="n">right</span>
<span class="k">elif</span> <span class="n">inclusive</span> <span class="o">==</span> <span class="s2">&quot;left&quot;</span><span class="p">:</span>
<span class="n">lmask</span> <span class="o">=</span> <span class="bp">self</span> <span class="o">&gt;=</span> <span class="n">left</span>
<span class="n">rmask</span> <span class="o">=</span> <span class="bp">self</span> <span class="o">&lt;</span> <span class="n">right</span>
<span class="k">elif</span> <span class="n">inclusive</span> <span class="o">==</span> <span class="s2">&quot;right&quot;</span><span class="p">:</span>
<span class="n">lmask</span> <span class="o">=</span> <span class="bp">self</span> <span class="o">&gt;</span> <span class="n">left</span>
<span class="n">rmask</span> <span class="o">=</span> <span class="bp">self</span> <span class="o">&lt;=</span> <span class="n">right</span>
<span class="k">elif</span> <span class="n">inclusive</span> <span class="o">==</span> <span class="s2">&quot;neither&quot;</span><span class="p">:</span>
<span class="n">lmask</span> <span class="o">=</span> <span class="bp">self</span> <span class="o">&gt;</span> <span class="n">left</span>
<span class="n">rmask</span> <span class="o">=</span> <span class="bp">self</span> <span class="o">&lt;</span> <span class="n">right</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;Inclusive has to be either string of &#39;both&#39;,&quot;</span> <span class="s2">&quot;&#39;left&#39;, &#39;right&#39;, or &#39;neither&#39;.&quot;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">lmask</span> <span class="o">&amp;</span> <span class="n">rmask</span></div>
<div class="viewcode-block" id="Series.cov"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.cov.html#pyspark.pandas.Series.cov">[docs]</a> <span class="k">def</span> <span class="nf">cov</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">ddof</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compute covariance with Series, excluding missing values.</span>
<span class="sd"> .. versionadded:: 3.3.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series</span>
<span class="sd"> Series with which to compute the covariance.</span>
<span class="sd"> min_periods : int, optional</span>
<span class="sd"> Minimum number of observations needed to have a valid result.</span>
<span class="sd"> ddof : int, default 1</span>
<span class="sd"> Delta degrees of freedom. The divisor used in calculations</span>
<span class="sd"> is ``N - ddof``, where ``N`` represents the number of elements.</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> float</span>
<span class="sd"> Covariance between Series and other</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.pandas.config import set_option, reset_option</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([0.90010907, 0.13484424, 0.62036035])</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([0.12528585, 0.26962463, 0.51111198])</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.ops_on_diff_frames&quot;, True):</span>
<span class="sd"> ... s1.cov(s2)</span>
<span class="sd"> -0.016857...</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.ops_on_diff_frames&quot;, True):</span>
<span class="sd"> ... s1.cov(s2, ddof=2)</span>
<span class="sd"> -0.033715...</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Series</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;unsupported type: </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">))</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">np</span><span class="o">.</span><span class="n">issubdtype</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">number</span><span class="p">):</span> <span class="c1"># type: ignore[arg-type]</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;unsupported dtype: </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">np</span><span class="o">.</span><span class="n">issubdtype</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">number</span><span class="p">):</span> <span class="c1"># type: ignore[arg-type]</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;unsupported dtype: </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">other</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">ddof</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;ddof must be integer&quot;</span><span class="p">)</span>
<span class="n">min_periods</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">min_periods</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">min_periods</span>
<span class="k">if</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> <span class="n">other</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">combine_frames</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">(),</span> <span class="n">other</span><span class="o">.</span><span class="n">to_frame</span><span class="p">())</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_columns</span><span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">dropna</span><span class="p">()</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sdf</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="n">min_periods</span><span class="p">))</span> <span class="o">&lt;</span> <span class="n">min_periods</span><span class="p">:</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">SF</span><span class="o">.</span><span class="n">covar</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">sdf</span><span class="o">.</span><span class="n">columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]),</span> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">sdf</span><span class="o">.</span><span class="n">columns</span><span class="p">[</span><span class="mi">1</span><span class="p">]),</span> <span class="n">ddof</span><span class="p">))</span>
<span class="k">return</span> <span class="n">sdf</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">1</span><span class="p">)[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span></div>
<span class="c1"># TODO: NaN and None when ``arg`` is an empty dict</span>
<span class="c1"># TODO: Support ps.Series ``arg``</span>
<div class="viewcode-block" id="Series.map"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.map.html#pyspark.pandas.Series.map">[docs]</a> <span class="k">def</span> <span class="nf">map</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">arg</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Dict</span><span class="p">,</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Any</span><span class="p">],</span> <span class="n">Any</span><span class="p">],</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">],</span> <span class="n">na_action</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Map values of Series according to input correspondence.</span>
<span class="sd"> Used for substituting each value in a Series with another value,</span>
<span class="sd"> that may be derived from a function, a ``dict``.</span>
<span class="sd"> .. note:: make sure the size of the dictionary is not huge because it could</span>
<span class="sd"> downgrade the performance or throw OutOfMemoryError due to a huge</span>
<span class="sd"> expression within Spark. Consider the input as a function as an</span>
<span class="sd"> alternative instead in this case.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> arg : function, dict or pd.Series</span>
<span class="sd"> Mapping correspondence.</span>
<span class="sd"> na_action :</span>
<span class="sd"> If `ignore`, propagate NA values, without passing them to the mapping correspondence.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Same index as caller.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.apply : For applying more complex functions on a Series.</span>
<span class="sd"> DataFrame.applymap : Apply a function element-wise on a whole DataFrame.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> When ``arg`` is a dictionary, values in Series that are not in the</span>
<span class="sd"> dictionary (as keys) is converted to ``None``. However, if the</span>
<span class="sd"> dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.</span>
<span class="sd"> provides a method for default values), then this default is used</span>
<span class="sd"> rather than ``None``.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&#39;cat&#39;, &#39;dog&#39;, None, &#39;rabbit&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 cat</span>
<span class="sd"> 1 dog</span>
<span class="sd"> 2 None</span>
<span class="sd"> 3 rabbit</span>
<span class="sd"> dtype: object</span>
<span class="sd"> ``map`` accepts a ``dict``. Values that are not found</span>
<span class="sd"> in the ``dict`` are converted to ``None``, unless the dict has a default</span>
<span class="sd"> value (e.g. ``defaultdict``):</span>
<span class="sd"> &gt;&gt;&gt; s.map({&#39;cat&#39;: &#39;kitten&#39;, &#39;dog&#39;: &#39;puppy&#39;})</span>
<span class="sd"> 0 kitten</span>
<span class="sd"> 1 puppy</span>
<span class="sd"> 2 None</span>
<span class="sd"> 3 None</span>
<span class="sd"> dtype: object</span>
<span class="sd"> It also accepts a pandas Series:</span>
<span class="sd"> &gt;&gt;&gt; pser = pd.Series([&#39;kitten&#39;, &#39;puppy&#39;], index=[&#39;cat&#39;, &#39;dog&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s.map(pser)</span>
<span class="sd"> 0 kitten</span>
<span class="sd"> 1 puppy</span>
<span class="sd"> 2 None</span>
<span class="sd"> 3 None</span>
<span class="sd"> dtype: object</span>
<span class="sd"> It also accepts a function:</span>
<span class="sd"> &gt;&gt;&gt; def format(x) -&gt; str:</span>
<span class="sd"> ... return &#39;I am a {}&#39;.format(x)</span>
<span class="sd"> &gt;&gt;&gt; s.map(format)</span>
<span class="sd"> 0 I am a cat</span>
<span class="sd"> 1 I am a dog</span>
<span class="sd"> 2 I am a None</span>
<span class="sd"> 3 I am a rabbit</span>
<span class="sd"> dtype: object</span>
<span class="sd"> To avoid applying the function to missing values (and keep them as NaN)</span>
<span class="sd"> na_action=&#39;ignore&#39; can be used:</span>
<span class="sd"> &gt;&gt;&gt; s.map(&#39;I am a {}&#39;.format, na_action=&#39;ignore&#39;)</span>
<span class="sd"> 0 I am a cat</span>
<span class="sd"> 1 I am a dog</span>
<span class="sd"> 2 None</span>
<span class="sd"> 3 I am a rabbit</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="p">(</span><span class="nb">dict</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">)):</span>
<span class="n">is_start</span> <span class="o">=</span> <span class="kc">True</span>
<span class="c1"># In case dictionary is empty.</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">False</span><span class="p">),</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">))</span>
<span class="k">for</span> <span class="n">to_replace</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">arg</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="n">is_start</span><span class="p">:</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="o">==</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">to_replace</span><span class="p">),</span> <span class="n">value</span><span class="p">)</span>
<span class="n">is_start</span> <span class="o">=</span> <span class="kc">False</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">current</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="o">==</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">to_replace</span><span class="p">),</span> <span class="n">value</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="s2">&quot;__missing__&quot;</span><span class="p">):</span>
<span class="n">tmp_val</span> <span class="o">=</span> <span class="n">arg</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">_NoValue</span><span class="p">]</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="c1"># Remove in case it&#39;s set in defaultdict.</span>
<span class="k">del</span> <span class="n">arg</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">_NoValue</span><span class="p">]</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">current</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">tmp_val</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">current</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">current</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">pandas_on_spark</span><span class="o">.</span><span class="n">transform_batch</span><span class="p">(</span><span class="k">lambda</span> <span class="n">pser</span><span class="p">:</span> <span class="n">pser</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="n">na_action</span><span class="p">))</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">shape</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Return a tuple of the shape of the underlying data.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">),)</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">name</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Name</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Return name of the Series.&quot;&quot;&quot;</span>
<span class="n">name</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span>
<span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">return</span> <span class="n">name</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">name</span>
<span class="nd">@name</span><span class="o">.</span><span class="n">setter</span>
<span class="k">def</span> <span class="nf">name</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="n">Name</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># TODO: Currently, changing index labels taking dictionary/Series is not supported.</span>
<div class="viewcode-block" id="Series.rename"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rename.html#pyspark.pandas.Series.rename">[docs]</a> <span class="k">def</span> <span class="nf">rename</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Any</span><span class="p">],</span> <span class="n">Any</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Alter Series index labels or name.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> index : scalar or function, optional</span>
<span class="sd"> Functions are transformations to apply to the index.</span>
<span class="sd"> Scalar will alter the Series.name attribute.</span>
<span class="sd"> inplace : bool, default False</span>
<span class="sd"> Whether to return a new Series. If True then value of copy is</span>
<span class="sd"> ignored.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with index labels or name altered.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rename(&quot;my_name&quot;) # scalar, changes Series.name</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> Name: my_name, dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rename(lambda x: x ** 2) # function, changes labels</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 4 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">index</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">pass</span>
<span class="k">if</span> <span class="nb">callable</span><span class="p">(</span><span class="n">index</span><span class="p">):</span>
<span class="k">if</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;inplace&quot;</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;inplace True is not supported yet for a function &#39;index&#39;&quot;</span><span class="p">)</span>
<span class="n">frame</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span>
<span class="n">new_index_name</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">frame</span><span class="p">,</span> <span class="s2">&quot;__index_name__&quot;</span><span class="p">)</span>
<span class="n">frame</span><span class="p">[</span><span class="n">new_index_name</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
<span class="n">frame</span><span class="o">.</span><span class="n">set_index</span><span class="p">(</span><span class="n">new_index_name</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">frame</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">name</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">frame</span><span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">index</span><span class="p">,</span> <span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;&#39;index&#39; of </span><span class="si">%s</span><span class="s2"> type is not supported yet&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">index</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
<span class="k">elif</span> <span class="ow">not</span> <span class="n">is_hashable</span><span class="p">(</span><span class="n">index</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;Series.name must be a hashable type&quot;</span><span class="p">)</span>
<span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">index</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">):</span>
<span class="n">index</span> <span class="o">=</span> <span class="p">(</span><span class="n">index</span><span class="p">,)</span>
<span class="n">name</span> <span class="o">=</span> <span class="n">name_like_string</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
<span class="n">field</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">column_labels</span><span class="o">=</span><span class="p">[</span><span class="n">index</span><span class="p">],</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol</span><span class="p">],</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span><span class="n">field</span><span class="p">],</span>
<span class="n">column_label_names</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">psdf</span><span class="p">:</span> <span class="n">DataFrame</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">)</span>
<span class="k">if</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;inplace&quot;</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_col_label</span> <span class="o">=</span> <span class="n">index</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.rename_axis"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rename_axis.html#pyspark.pandas.Series.rename_axis">[docs]</a> <span class="k">def</span> <span class="nf">rename_axis</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">mapper</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">index</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Set the name of the axis for the index or columns.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> mapper, index : scalar, list-like, dict-like or function, optional</span>
<span class="sd"> A scalar, list-like, dict-like or functions transformations to</span>
<span class="sd"> apply to the index values.</span>
<span class="sd"> inplace : bool, default False</span>
<span class="sd"> Modifies the object directly, instead of creating a new Series.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series, or None if `inplace` is True.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.rename : Alter Series index labels or name.</span>
<span class="sd"> DataFrame.rename : Alter DataFrame index labels or name.</span>
<span class="sd"> Index.rename : Set new names on index.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&quot;dog&quot;, &quot;cat&quot;, &quot;monkey&quot;], name=&quot;animal&quot;)</span>
<span class="sd"> &gt;&gt;&gt; s # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> 0 dog</span>
<span class="sd"> 1 cat</span>
<span class="sd"> 2 monkey</span>
<span class="sd"> Name: animal, dtype: object</span>
<span class="sd"> &gt;&gt;&gt; s.rename_axis(&quot;index&quot;).sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> index</span>
<span class="sd"> 0 dog</span>
<span class="sd"> 1 cat</span>
<span class="sd"> 2 monkey</span>
<span class="sd"> Name: animal, dtype: object</span>
<span class="sd"> **MultiIndex**</span>
<span class="sd"> &gt;&gt;&gt; index = pd.MultiIndex.from_product([[&#39;mammal&#39;],</span>
<span class="sd"> ... [&#39;dog&#39;, &#39;cat&#39;, &#39;monkey&#39;]],</span>
<span class="sd"> ... names=[&#39;type&#39;, &#39;name&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 4, 2], index=index, name=&#39;num_legs&#39;)</span>
<span class="sd"> &gt;&gt;&gt; s # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> type name</span>
<span class="sd"> mammal dog 4</span>
<span class="sd"> cat 4</span>
<span class="sd"> monkey 2</span>
<span class="sd"> Name: num_legs, dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rename_axis(index={&#39;type&#39;: &#39;class&#39;}).sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> class name</span>
<span class="sd"> mammal cat 4</span>
<span class="sd"> dog 4</span>
<span class="sd"> monkey 2</span>
<span class="sd"> Name: num_legs, dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rename_axis(index=str.upper).sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> TYPE NAME</span>
<span class="sd"> mammal cat 4</span>
<span class="sd"> dog 4</span>
<span class="sd"> monkey 2</span>
<span class="sd"> Name: num_legs, dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">rename_axis</span><span class="p">(</span><span class="n">mapper</span><span class="o">=</span><span class="n">mapper</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="n">index</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">index</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;ps.Index&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;The index (axis labels) Column of the Series.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Index</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">index</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">is_unique</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return boolean if values in the object are unique</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> is_unique : boolean</span>
<span class="sd"> &gt;&gt;&gt; ps.Series([1, 2, 3]).is_unique</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; ps.Series([1, 2, 2]).is_unique</span>
<span class="sd"> False</span>
<span class="sd"> &gt;&gt;&gt; ps.Series([1, 2, 3, None]).is_unique</span>
<span class="sd"> True</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="c1"># Here we check:</span>
<span class="c1"># 1. the distinct count without nulls and count without nulls for non-null values</span>
<span class="c1"># 2. count null values and see if null is a distinct value.</span>
<span class="c1">#</span>
<span class="c1"># This workaround is to calculate the distinct count including nulls in</span>
<span class="c1"># single pass. Note that COUNT(DISTINCT expr) in Spark is designed to ignore nulls.</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span> <span class="o">==</span> <span class="n">F</span><span class="o">.</span><span class="n">countDistinct</span><span class="p">(</span><span class="n">scol</span><span class="p">))</span>
<span class="o">&amp;</span> <span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span> <span class="o">&lt;=</span> <span class="mi">1</span><span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span>
<div class="viewcode-block" id="Series.reset_index"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.reset_index.html#pyspark.pandas.Series.reset_index">[docs]</a> <span class="k">def</span> <span class="nf">reset_index</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">level</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Name</span><span class="p">,</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Name</span><span class="p">]]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">drop</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">name</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Name</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generate a new DataFrame or Series with the index reset.</span>
<span class="sd"> This is useful when the index needs to be treated as a column,</span>
<span class="sd"> or when the index is meaningless and needs to be reset</span>
<span class="sd"> to the default before another operation.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> level : int, str, tuple, or list, default optional</span>
<span class="sd"> For a Series with a MultiIndex, only remove the specified levels from the index.</span>
<span class="sd"> Removes all levels by default.</span>
<span class="sd"> drop : bool, default False</span>
<span class="sd"> Just reset the index, without inserting it as a column in the new DataFrame.</span>
<span class="sd"> name : object, optional</span>
<span class="sd"> The name to use for the column containing the original Series values.</span>
<span class="sd"> Uses self.name by default. This argument is ignored when drop is True.</span>
<span class="sd"> inplace : bool, default False</span>
<span class="sd"> Modify the Series in place (do not create a new object).</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> When `drop` is False (the default), a DataFrame is returned.</span>
<span class="sd"> The newly created columns will come first in the DataFrame,</span>
<span class="sd"> followed by the original Series values.</span>
<span class="sd"> When `drop` is True, a `Series` is returned.</span>
<span class="sd"> In either case, if ``inplace=True``, no value is returned.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4], index=pd.Index([&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], name=&#39;idx&#39;))</span>
<span class="sd"> Generate a DataFrame with default index.</span>
<span class="sd"> &gt;&gt;&gt; s.reset_index()</span>
<span class="sd"> idx 0</span>
<span class="sd"> 0 a 1</span>
<span class="sd"> 1 b 2</span>
<span class="sd"> 2 c 3</span>
<span class="sd"> 3 d 4</span>
<span class="sd"> To specify the name of the new column use `name`.</span>
<span class="sd"> &gt;&gt;&gt; s.reset_index(name=&#39;values&#39;)</span>
<span class="sd"> idx values</span>
<span class="sd"> 0 a 1</span>
<span class="sd"> 1 b 2</span>
<span class="sd"> 2 c 3</span>
<span class="sd"> 3 d 4</span>
<span class="sd"> To generate a new Series with the default set `drop` to True.</span>
<span class="sd"> &gt;&gt;&gt; s.reset_index(drop=True)</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> To update the Series in place, without generating a new one</span>
<span class="sd"> set `inplace` to True. Note that it also requires ``drop=True``.</span>
<span class="sd"> &gt;&gt;&gt; s.reset_index(inplace=True, drop=True)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">inplace</span> <span class="o">=</span> <span class="n">validate_bool_kwarg</span><span class="p">(</span><span class="n">inplace</span><span class="p">,</span> <span class="s2">&quot;inplace&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">drop</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;Cannot reset_index inplace on a Series to create a DataFrame&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">drop</span><span class="p">:</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">[[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">psser</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">psser</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="n">psdf</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="n">level</span><span class="p">,</span> <span class="n">drop</span><span class="o">=</span><span class="n">drop</span><span class="p">)</span>
<span class="k">if</span> <span class="n">drop</span><span class="p">:</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">psdf</span></div>
<div class="viewcode-block" id="Series.to_frame"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.to_frame.html#pyspark.pandas.Series.to_frame">[docs]</a> <span class="k">def</span> <span class="nf">to_frame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Name</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Convert Series to DataFrame.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> name : object, default None</span>
<span class="sd"> The passed name should substitute for the series name (if it has</span>
<span class="sd"> one).</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> DataFrame</span>
<span class="sd"> DataFrame representation of Series.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])</span>
<span class="sd"> &gt;&gt;&gt; s.to_frame()</span>
<span class="sd"> 0</span>
<span class="sd"> 0 a</span>
<span class="sd"> 1 b</span>
<span class="sd"> 2 c</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;], name=&quot;vals&quot;)</span>
<span class="sd"> &gt;&gt;&gt; s.to_frame()</span>
<span class="sd"> vals</span>
<span class="sd"> 0 a</span>
<span class="sd"> 1 b</span>
<span class="sd"> 2 c</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">renamed</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">renamed</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">DEFAULT_SERIES_NAME</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">renamed</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">renamed</span><span class="o">.</span><span class="n">_internal</span><span class="p">)</span></div>
<span class="n">to_dataframe</span> <span class="o">=</span> <span class="n">to_frame</span>
<div class="viewcode-block" id="Series.to_string"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.to_string.html#pyspark.pandas.Series.to_string">[docs]</a> <span class="k">def</span> <span class="nf">to_string</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">buf</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">IO</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">na_rep</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;NaN&quot;</span><span class="p">,</span>
<span class="n">float_format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Callable</span><span class="p">[[</span><span class="nb">float</span><span class="p">],</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">header</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">index</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">length</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">dtype</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">name</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">max_rows</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Render a string representation of the Series.</span>
<span class="sd"> .. note:: This method should only be used if the resulting pandas object is expected</span>
<span class="sd"> to be small, as all the data is loaded into the driver&#39;s memory. If the input</span>
<span class="sd"> is large, set max_rows parameter.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> buf : StringIO-like, optional</span>
<span class="sd"> buffer to write to</span>
<span class="sd"> na_rep : string, optional</span>
<span class="sd"> string representation of NAN to use, default &#39;NaN&#39;</span>
<span class="sd"> float_format : one-parameter function, optional</span>
<span class="sd"> formatter function to apply to columns&#39; elements if they are floats</span>
<span class="sd"> default None</span>
<span class="sd"> header : boolean, default True</span>
<span class="sd"> Add the Series header (index name)</span>
<span class="sd"> index : bool, optional</span>
<span class="sd"> Add index (row) labels, default True</span>
<span class="sd"> length : boolean, default False</span>
<span class="sd"> Add the Series length</span>
<span class="sd"> dtype : boolean, default False</span>
<span class="sd"> Add the Series dtype</span>
<span class="sd"> name : boolean, default False</span>
<span class="sd"> Add the Series name if not None</span>
<span class="sd"> max_rows : int, optional</span>
<span class="sd"> Maximum number of rows to show before truncating. If None, show</span>
<span class="sd"> all.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> formatted : string (if not buffer passed)</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)], columns=[&#39;dogs&#39;, &#39;cats&#39;])</span>
<span class="sd"> &gt;&gt;&gt; print(df[&#39;dogs&#39;].to_string())</span>
<span class="sd"> 0 0.2</span>
<span class="sd"> 1 0.0</span>
<span class="sd"> 2 0.6</span>
<span class="sd"> 3 0.2</span>
<span class="sd"> &gt;&gt;&gt; print(df[&#39;dogs&#39;].to_string(max_rows=2))</span>
<span class="sd"> 0 0.2</span>
<span class="sd"> 1 0.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Make sure locals() call is at the top of the function so we don&#39;t capture local variables.</span>
<span class="n">args</span> <span class="o">=</span> <span class="nb">locals</span><span class="p">()</span>
<span class="k">if</span> <span class="n">max_rows</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">psseries</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="n">max_rows</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">psseries</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">return</span> <span class="n">validate_arguments_and_invoke_function</span><span class="p">(</span>
<span class="n">psseries</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_string</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="o">.</span><span class="n">to_string</span><span class="p">,</span> <span class="n">args</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="Series.to_clipboard"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.to_clipboard.html#pyspark.pandas.Series.to_clipboard">[docs]</a> <span class="k">def</span> <span class="nf">to_clipboard</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">excel</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">sep</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># Docstring defined below by reusing DataFrame.to_clipboard&#39;s.</span>
<span class="n">args</span> <span class="o">=</span> <span class="nb">locals</span><span class="p">()</span>
<span class="n">psseries</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">return</span> <span class="n">validate_arguments_and_invoke_function</span><span class="p">(</span>
<span class="n">psseries</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_clipboard</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="o">.</span><span class="n">to_clipboard</span><span class="p">,</span> <span class="n">args</span>
<span class="p">)</span></div>
<span class="n">to_clipboard</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="o">.</span><span class="n">to_clipboard</span><span class="o">.</span><span class="vm">__doc__</span>
<div class="viewcode-block" id="Series.to_dict"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.to_dict.html#pyspark.pandas.Series.to_dict">[docs]</a> <span class="k">def</span> <span class="nf">to_dict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">into</span><span class="p">:</span> <span class="n">Type</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Mapping</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Convert Series to {label -&gt; value} dict or dict-like object.</span>
<span class="sd"> .. note:: This method should only be used if the resulting pandas DataFrame is expected</span>
<span class="sd"> to be small, as all the data is loaded into the driver&#39;s memory.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> into : class, default dict</span>
<span class="sd"> The collections.abc.Mapping subclass to use as the return</span>
<span class="sd"> object. Can be the actual class or an empty</span>
<span class="sd"> instance of the mapping type you want. If you want a</span>
<span class="sd"> collections.defaultdict, you must pass it initialized.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> collections.abc.Mapping</span>
<span class="sd"> Key-value representation of Series.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s_dict = s.to_dict()</span>
<span class="sd"> &gt;&gt;&gt; sorted(s_dict.items())</span>
<span class="sd"> [(0, 1), (1, 2), (2, 3), (3, 4)]</span>
<span class="sd"> &gt;&gt;&gt; from collections import OrderedDict, defaultdict</span>
<span class="sd"> &gt;&gt;&gt; s.to_dict(OrderedDict)</span>
<span class="sd"> OrderedDict(...)</span>
<span class="sd"> &gt;&gt;&gt; dd = defaultdict(list)</span>
<span class="sd"> &gt;&gt;&gt; s.to_dict(dd) # doctest: +ELLIPSIS</span>
<span class="sd"> defaultdict(&lt;class &#39;list&#39;&gt;, {...})</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Make sure locals() call is at the top of the function so we don&#39;t capture local variables.</span>
<span class="n">args</span> <span class="o">=</span> <span class="nb">locals</span><span class="p">()</span>
<span class="n">psseries</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">return</span> <span class="n">validate_arguments_and_invoke_function</span><span class="p">(</span>
<span class="n">psseries</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_dict</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="o">.</span><span class="n">to_dict</span><span class="p">,</span> <span class="n">args</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="Series.to_latex"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.to_latex.html#pyspark.pandas.Series.to_latex">[docs]</a> <span class="k">def</span> <span class="nf">to_latex</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">buf</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">IO</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">columns</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">Name</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">header</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">index</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">na_rep</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;NaN&quot;</span><span class="p">,</span>
<span class="n">formatters</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span>
<span class="n">Union</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">Callable</span><span class="p">[[</span><span class="n">Any</span><span class="p">],</span> <span class="nb">str</span><span class="p">]],</span> <span class="n">Dict</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Any</span><span class="p">],</span> <span class="nb">str</span><span class="p">]]]</span>
<span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">float_format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Callable</span><span class="p">[[</span><span class="nb">float</span><span class="p">],</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">sparsify</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">index_names</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">bold_rows</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">column_format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">longtable</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">escape</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">encoding</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">decimal</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;.&quot;</span><span class="p">,</span>
<span class="n">multicolumn</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">multicolumn_format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">multirow</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
<span class="n">args</span> <span class="o">=</span> <span class="nb">locals</span><span class="p">()</span>
<span class="n">psseries</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">return</span> <span class="n">validate_arguments_and_invoke_function</span><span class="p">(</span>
<span class="n">psseries</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_latex</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="o">.</span><span class="n">to_latex</span><span class="p">,</span> <span class="n">args</span>
<span class="p">)</span></div>
<span class="n">to_latex</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="o">.</span><span class="n">to_latex</span><span class="o">.</span><span class="vm">__doc__</span>
<div class="viewcode-block" id="Series.to_pandas"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.to_pandas.html#pyspark.pandas.Series.to_pandas">[docs]</a> <span class="k">def</span> <span class="nf">to_pandas</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return a pandas Series.</span>
<span class="sd"> .. note:: This method should only be used if the resulting pandas object is expected</span>
<span class="sd"> to be small, as all the data is loaded into the driver&#39;s memory.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)], columns=[&#39;dogs&#39;, &#39;cats&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df[&#39;dogs&#39;].to_pandas()</span>
<span class="sd"> 0 0.2</span>
<span class="sd"> 1 0.0</span>
<span class="sd"> 2 0.6</span>
<span class="sd"> 3 0.2</span>
<span class="sd"> Name: dogs, dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">log_advice</span><span class="p">(</span>
<span class="s2">&quot;`to_pandas` loads all data into the driver&#39;s memory. &quot;</span>
<span class="s2">&quot;It should only be used if the resulting pandas Series is expected to be small.&quot;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_to_pandas</span><span class="p">()</span></div>
<span class="k">def</span> <span class="nf">_to_pandas</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Same as `to_pandas()`, without issuing the advice log for internal usage.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<div class="viewcode-block" id="Series.to_list"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.to_list.html#pyspark.pandas.Series.to_list">[docs]</a> <span class="k">def</span> <span class="nf">to_list</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return a list of the values.</span>
<span class="sd"> These are each a scalar type, which is a Python scalar</span>
<span class="sd"> (for str, int, float) or a pandas scalar</span>
<span class="sd"> (for Timestamp/Timedelta/Interval/Period)</span>
<span class="sd"> .. note:: This method should only be used if the resulting list is expected</span>
<span class="sd"> to be small, as all the data is loaded into the driver&#39;s memory.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">log_advice</span><span class="p">(</span>
<span class="s2">&quot;`to_list` loads all data into the driver&#39;s memory. &quot;</span>
<span class="s2">&quot;It should only be used if the resulting list is expected to be small.&quot;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span></div>
<span class="n">tolist</span> <span class="o">=</span> <span class="n">to_list</span>
<div class="viewcode-block" id="Series.duplicated"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.duplicated.html#pyspark.pandas.Series.duplicated">[docs]</a> <span class="k">def</span> <span class="nf">duplicated</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">keep</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;first&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Indicate duplicate Series values.</span>
<span class="sd"> Duplicated values are indicated as ``True`` values in the resulting</span>
<span class="sd"> Series. Either all duplicates, all except the first or all except the</span>
<span class="sd"> last occurrence of duplicates can be indicated.</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> keep : {&#39;first&#39;, &#39;last&#39;, False}, default &#39;first&#39;</span>
<span class="sd"> Method to handle marking duplicates:</span>
<span class="sd"> - &#39;first&#39; : Mark duplicates as ``True`` except for the first occurrence.</span>
<span class="sd"> - &#39;last&#39; : Mark duplicates as ``True`` except for the last occurrence.</span>
<span class="sd"> - ``False`` : Mark all duplicates as ``True``.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series indicating whether each value has occurred in the</span>
<span class="sd"> preceding values</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Index.drop_duplicates : Remove duplicate values from Index.</span>
<span class="sd"> DataFrame.duplicated : Equivalent method on DataFrame.</span>
<span class="sd"> Series.drop_duplicates : Remove duplicate values from Series.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> By default, for each set of duplicated values, the first occurrence is</span>
<span class="sd"> set on False and all others on True:</span>
<span class="sd"> &gt;&gt;&gt; animals = ps.Series([&#39;lama&#39;, &#39;cow&#39;, &#39;lama&#39;, &#39;beetle&#39;, &#39;lama&#39;])</span>
<span class="sd"> &gt;&gt;&gt; animals.duplicated().sort_index()</span>
<span class="sd"> 0 False</span>
<span class="sd"> 1 False</span>
<span class="sd"> 2 True</span>
<span class="sd"> 3 False</span>
<span class="sd"> 4 True</span>
<span class="sd"> dtype: bool</span>
<span class="sd"> which is equivalent to</span>
<span class="sd"> &gt;&gt;&gt; animals.duplicated(keep=&#39;first&#39;).sort_index()</span>
<span class="sd"> 0 False</span>
<span class="sd"> 1 False</span>
<span class="sd"> 2 True</span>
<span class="sd"> 3 False</span>
<span class="sd"> 4 True</span>
<span class="sd"> dtype: bool</span>
<span class="sd"> By using &#39;last&#39;, the last occurrence of each set of duplicated values</span>
<span class="sd"> is set on False and all others on True:</span>
<span class="sd"> &gt;&gt;&gt; animals.duplicated(keep=&#39;last&#39;).sort_index()</span>
<span class="sd"> 0 True</span>
<span class="sd"> 1 False</span>
<span class="sd"> 2 True</span>
<span class="sd"> 3 False</span>
<span class="sd"> 4 False</span>
<span class="sd"> dtype: bool</span>
<span class="sd"> By setting keep on ``False``, all duplicates are True:</span>
<span class="sd"> &gt;&gt;&gt; animals.duplicated(keep=False).sort_index()</span>
<span class="sd"> 0 True</span>
<span class="sd"> 1 False</span>
<span class="sd"> 2 True</span>
<span class="sd"> 3 False</span>
<span class="sd"> 4 True</span>
<span class="sd"> dtype: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">[[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]]</span><span class="o">.</span><span class="n">duplicated</span><span class="p">(</span><span class="n">keep</span><span class="o">=</span><span class="n">keep</span><span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.drop_duplicates"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.drop_duplicates.html#pyspark.pandas.Series.drop_duplicates">[docs]</a> <span class="k">def</span> <span class="nf">drop_duplicates</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">keep</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;first&quot;</span><span class="p">,</span> <span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return Series with duplicate values removed.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> keep : {&#39;first&#39;, &#39;last&#39;, ``False``}, default &#39;first&#39;</span>
<span class="sd"> Method to handle dropping duplicates:</span>
<span class="sd"> - &#39;first&#39; : Drop duplicates except for the first occurrence.</span>
<span class="sd"> - &#39;last&#39; : Drop duplicates except for the last occurrence.</span>
<span class="sd"> - ``False`` : Drop all duplicates.</span>
<span class="sd"> inplace : bool, default ``False``</span>
<span class="sd"> If ``True``, performs operation inplace and returns None.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with duplicates dropped.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Generate a Series with duplicated entries.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&#39;lama&#39;, &#39;cow&#39;, &#39;lama&#39;, &#39;beetle&#39;, &#39;lama&#39;, &#39;hippo&#39;],</span>
<span class="sd"> ... name=&#39;animal&#39;)</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 0 lama</span>
<span class="sd"> 1 cow</span>
<span class="sd"> 2 lama</span>
<span class="sd"> 3 beetle</span>
<span class="sd"> 4 lama</span>
<span class="sd"> 5 hippo</span>
<span class="sd"> Name: animal, dtype: object</span>
<span class="sd"> With the &#39;keep&#39; parameter, the selection behavior of duplicated values</span>
<span class="sd"> can be changed. The value &#39;first&#39; keeps the first occurrence for each</span>
<span class="sd"> set of duplicated entries. The default value of keep is &#39;first&#39;.</span>
<span class="sd"> &gt;&gt;&gt; s.drop_duplicates().sort_index()</span>
<span class="sd"> 0 lama</span>
<span class="sd"> 1 cow</span>
<span class="sd"> 3 beetle</span>
<span class="sd"> 5 hippo</span>
<span class="sd"> Name: animal, dtype: object</span>
<span class="sd"> The value &#39;last&#39; for parameter &#39;keep&#39; keeps the last occurrence for</span>
<span class="sd"> each set of duplicated entries.</span>
<span class="sd"> &gt;&gt;&gt; s.drop_duplicates(keep=&#39;last&#39;).sort_index()</span>
<span class="sd"> 1 cow</span>
<span class="sd"> 3 beetle</span>
<span class="sd"> 4 lama</span>
<span class="sd"> 5 hippo</span>
<span class="sd"> Name: animal, dtype: object</span>
<span class="sd"> The value ``False`` for parameter &#39;keep&#39; discards all sets of</span>
<span class="sd"> duplicated entries. Setting the value of &#39;inplace&#39; to ``True`` performs</span>
<span class="sd"> the operation inplace and returns ``None``.</span>
<span class="sd"> &gt;&gt;&gt; s.drop_duplicates(keep=False, inplace=True)</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 1 cow</span>
<span class="sd"> 3 beetle</span>
<span class="sd"> 5 hippo</span>
<span class="sd"> Name: animal, dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">inplace</span> <span class="o">=</span> <span class="n">validate_bool_kwarg</span><span class="p">(</span><span class="n">inplace</span><span class="p">,</span> <span class="s2">&quot;inplace&quot;</span><span class="p">)</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">[[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]]</span><span class="o">.</span><span class="n">drop_duplicates</span><span class="p">(</span><span class="n">keep</span><span class="o">=</span><span class="n">keep</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.reindex"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.reindex.html#pyspark.pandas.Series.reindex">[docs]</a> <span class="k">def</span> <span class="nf">reindex</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">fill_value</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Conform Series to new index with optional filling logic, placing</span>
<span class="sd"> NA/NaN in locations having no value in the previous index. A new object</span>
<span class="sd"> is produced.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> index: array-like, optional</span>
<span class="sd"> New labels / index to conform to, should be specified using keywords.</span>
<span class="sd"> Preferably an Index object to avoid duplicating data</span>
<span class="sd"> fill_value : scalar, default np.NaN</span>
<span class="sd"> Value to use for missing values. Defaults to NaN, but can be any</span>
<span class="sd"> &quot;compatible&quot; value.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series with changed index.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.reset_index : Remove row labels or move them to new columns.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Create a series with some fictional data.</span>
<span class="sd"> &gt;&gt;&gt; index = [&#39;Firefox&#39;, &#39;Chrome&#39;, &#39;Safari&#39;, &#39;IE10&#39;, &#39;Konqueror&#39;]</span>
<span class="sd"> &gt;&gt;&gt; ser = ps.Series([200, 200, 404, 404, 301],</span>
<span class="sd"> ... index=index, name=&#39;http_status&#39;)</span>
<span class="sd"> &gt;&gt;&gt; ser</span>
<span class="sd"> Firefox 200</span>
<span class="sd"> Chrome 200</span>
<span class="sd"> Safari 404</span>
<span class="sd"> IE10 404</span>
<span class="sd"> Konqueror 301</span>
<span class="sd"> Name: http_status, dtype: int64</span>
<span class="sd"> Create a new index and reindex the Series. By default</span>
<span class="sd"> values in the new index that do not have corresponding</span>
<span class="sd"> records in the Series are assigned ``NaN``.</span>
<span class="sd"> &gt;&gt;&gt; new_index= [&#39;Safari&#39;, &#39;Iceweasel&#39;, &#39;Comodo Dragon&#39;, &#39;IE10&#39;,</span>
<span class="sd"> ... &#39;Chrome&#39;]</span>
<span class="sd"> &gt;&gt;&gt; ser.reindex(new_index).sort_index()</span>
<span class="sd"> Chrome 200.0</span>
<span class="sd"> Comodo Dragon NaN</span>
<span class="sd"> IE10 404.0</span>
<span class="sd"> Iceweasel NaN</span>
<span class="sd"> Safari 404.0</span>
<span class="sd"> Name: http_status, dtype: float64</span>
<span class="sd"> We can fill in the missing values by passing a value to</span>
<span class="sd"> the keyword ``fill_value``.</span>
<span class="sd"> &gt;&gt;&gt; ser.reindex(new_index, fill_value=0).sort_index()</span>
<span class="sd"> Chrome 200</span>
<span class="sd"> Comodo Dragon 0</span>
<span class="sd"> IE10 404</span>
<span class="sd"> Iceweasel 0</span>
<span class="sd"> Safari 404</span>
<span class="sd"> Name: http_status, dtype: int64</span>
<span class="sd"> To further illustrate the filling functionality in</span>
<span class="sd"> ``reindex``, we will create a Series with a</span>
<span class="sd"> monotonically increasing index (for example, a sequence</span>
<span class="sd"> of dates).</span>
<span class="sd"> &gt;&gt;&gt; date_index = pd.date_range(&#39;1/1/2010&#39;, periods=6, freq=&#39;D&#39;)</span>
<span class="sd"> &gt;&gt;&gt; ser2 = ps.Series([100, 101, np.nan, 100, 89, 88],</span>
<span class="sd"> ... name=&#39;prices&#39;, index=date_index)</span>
<span class="sd"> &gt;&gt;&gt; ser2.sort_index()</span>
<span class="sd"> 2010-01-01 100.0</span>
<span class="sd"> 2010-01-02 101.0</span>
<span class="sd"> 2010-01-03 NaN</span>
<span class="sd"> 2010-01-04 100.0</span>
<span class="sd"> 2010-01-05 89.0</span>
<span class="sd"> 2010-01-06 88.0</span>
<span class="sd"> Name: prices, dtype: float64</span>
<span class="sd"> Suppose we decide to expand the series to cover a wider</span>
<span class="sd"> date range.</span>
<span class="sd"> &gt;&gt;&gt; date_index2 = pd.date_range(&#39;12/29/2009&#39;, periods=10, freq=&#39;D&#39;)</span>
<span class="sd"> &gt;&gt;&gt; ser2.reindex(date_index2).sort_index()</span>
<span class="sd"> 2009-12-29 NaN</span>
<span class="sd"> 2009-12-30 NaN</span>
<span class="sd"> 2009-12-31 NaN</span>
<span class="sd"> 2010-01-01 100.0</span>
<span class="sd"> 2010-01-02 101.0</span>
<span class="sd"> 2010-01-03 NaN</span>
<span class="sd"> 2010-01-04 100.0</span>
<span class="sd"> 2010-01-05 89.0</span>
<span class="sd"> 2010-01-06 88.0</span>
<span class="sd"> 2010-01-07 NaN</span>
<span class="sd"> Name: prices, dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">reindex</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="n">index</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="n">fill_value</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">name</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="Series.reindex_like"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.reindex_like.html#pyspark.pandas.Series.reindex_like">[docs]</a> <span class="k">def</span> <span class="nf">reindex_like</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return a Series with matching indices as other object.</span>
<span class="sd"> Conform the object to the same index on all axes. Places NA/NaN in locations</span>
<span class="sd"> having no value in the previous index.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series or DataFrame</span>
<span class="sd"> Its row and column indices are used to define the new indices</span>
<span class="sd"> of this object.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with changed indices on each axis.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> DataFrame.set_index : Set row labels.</span>
<span class="sd"> DataFrame.reset_index : Remove row labels or move them to new columns.</span>
<span class="sd"> DataFrame.reindex : Change to new indices or expand indices.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Same as calling</span>
<span class="sd"> ``.reindex(index=other.index, ...)``.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([24.3, 31.0, 22.0, 35.0],</span>
<span class="sd"> ... index=pd.date_range(start=&#39;2014-02-12&#39;,</span>
<span class="sd"> ... end=&#39;2014-02-15&#39;, freq=&#39;D&#39;),</span>
<span class="sd"> ... name=&quot;temp_celsius&quot;)</span>
<span class="sd"> &gt;&gt;&gt; s1</span>
<span class="sd"> 2014-02-12 24.3</span>
<span class="sd"> 2014-02-13 31.0</span>
<span class="sd"> 2014-02-14 22.0</span>
<span class="sd"> 2014-02-15 35.0</span>
<span class="sd"> Name: temp_celsius, dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([&quot;low&quot;, &quot;low&quot;, &quot;medium&quot;],</span>
<span class="sd"> ... index=pd.DatetimeIndex([&#39;2014-02-12&#39;, &#39;2014-02-13&#39;,</span>
<span class="sd"> ... &#39;2014-02-15&#39;]),</span>
<span class="sd"> ... name=&quot;winspeed&quot;)</span>
<span class="sd"> &gt;&gt;&gt; s2</span>
<span class="sd"> 2014-02-12 low</span>
<span class="sd"> 2014-02-13 low</span>
<span class="sd"> 2014-02-15 medium</span>
<span class="sd"> Name: winspeed, dtype: object</span>
<span class="sd"> &gt;&gt;&gt; s2.reindex_like(s1).sort_index()</span>
<span class="sd"> 2014-02-12 low</span>
<span class="sd"> 2014-02-13 low</span>
<span class="sd"> 2014-02-14 None</span>
<span class="sd"> 2014-02-15 medium</span>
<span class="sd"> Name: winspeed, dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="p">(</span><span class="n">Series</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">)):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">reindex</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="n">other</span><span class="o">.</span><span class="n">index</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;other must be a pandas-on-Spark Series or DataFrame&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.fillna"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.fillna.html#pyspark.pandas.Series.fillna">[docs]</a> <span class="k">def</span> <span class="nf">fillna</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">value</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">method</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Fill NA/NaN values.</span>
<span class="sd"> .. note:: the current implementation of &#39;method&#39; parameter in fillna uses Spark&#39;s Window</span>
<span class="sd"> without specifying partition specification. This leads to moveing all data into</span>
<span class="sd"> a single partition in a single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method with very large datasets.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> value : scalar, dict, Series</span>
<span class="sd"> Value to use to fill holes. alternately a dict/Series of values</span>
<span class="sd"> specifying which value to use for each column.</span>
<span class="sd"> DataFrame is not supported.</span>
<span class="sd"> method : {&#39;backfill&#39;, &#39;bfill&#39;, &#39;pad&#39;, &#39;ffill&#39;, None}, default None</span>
<span class="sd"> Method to use for filling holes in reindexed Series pad / ffill: propagate last valid</span>
<span class="sd"> observation forward to next valid backfill / bfill:</span>
<span class="sd"> use NEXT valid observation to fill gap</span>
<span class="sd"> .. deprecated:: 4.0.0</span>
<span class="sd"> axis : {0 or `index`}</span>
<span class="sd"> 1 and `columns` are not supported.</span>
<span class="sd"> inplace : boolean, default False</span>
<span class="sd"> Fill in place (do not create a new object)</span>
<span class="sd"> limit : int, default None</span>
<span class="sd"> If method is specified, this is the maximum number of consecutive NaN values to</span>
<span class="sd"> forward/backward fill. In other words, if there is a gap with more than this number of</span>
<span class="sd"> consecutive NaNs, it will only be partially filled. If method is not specified,</span>
<span class="sd"> this is the maximum number of entries along the entire axis where NaNs will be filled.</span>
<span class="sd"> Must be greater than 0 if not None</span>
<span class="sd"> .. deprecated:: 4.0.0</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with NA entries filled.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([np.nan, 2, 3, 4, np.nan, 6], name=&#39;x&#39;)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> 4 NaN</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> Name: x, dtype: float64</span>
<span class="sd"> Replace all NaN elements with 0s.</span>
<span class="sd"> &gt;&gt;&gt; s.fillna(0)</span>
<span class="sd"> 0 0.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> 4 0.0</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> Name: x, dtype: float64</span>
<span class="sd"> We can also propagate non-null values forward or backward.</span>
<span class="sd"> &gt;&gt;&gt; s.fillna(method=&#39;ffill&#39;)</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> 4 4.0</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> Name: x, dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([np.nan, &#39;a&#39;, &#39;b&#39;, &#39;c&#39;, np.nan], name=&#39;x&#39;)</span>
<span class="sd"> &gt;&gt;&gt; s.fillna(method=&#39;ffill&#39;)</span>
<span class="sd"> 0 None</span>
<span class="sd"> 1 a</span>
<span class="sd"> 2 b</span>
<span class="sd"> 3 c</span>
<span class="sd"> 4 c</span>
<span class="sd"> Name: x, dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">psser</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_fillna</span><span class="p">(</span><span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="n">method</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="n">limit</span><span class="p">)</span>
<span class="k">if</span> <span class="n">method</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
<span class="s2">&quot;Series.fillna with &#39;method&#39; is deprecated and will raise in a future version. &quot;</span>
<span class="s2">&quot;Use Series.ffill() or Series.bfill() instead.&quot;</span><span class="p">,</span>
<span class="ne">FutureWarning</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">psser</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">resolved_copy</span><span class="p">)</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="n">inplace</span> <span class="o">=</span> <span class="n">validate_bool_kwarg</span><span class="p">(</span><span class="n">inplace</span><span class="p">,</span> <span class="s2">&quot;inplace&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_update_internal_frame</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="p">,</span> <span class="n">check_same_anchor</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">psser</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span></div>
<span class="k">def</span> <span class="nf">_fillna</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">value</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">method</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">part_cols</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="s2">&quot;ColumnOrName&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(),</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span>
<span class="k">if</span> <span class="n">axis</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;fillna currently only works for axis=0 or axis=&#39;index&#39;&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="p">(</span><span class="n">value</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="n">method</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Must specify a fillna &#39;value&#39; or &#39;method&#39; parameter.&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="p">(</span><span class="n">method</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="n">method</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;ffill&quot;</span><span class="p">,</span> <span class="s2">&quot;pad&quot;</span><span class="p">,</span> <span class="s2">&quot;backfill&quot;</span><span class="p">,</span> <span class="s2">&quot;bfill&quot;</span><span class="p">]):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Expecting &#39;pad&#39;, &#39;ffill&#39;, &#39;backfill&#39; or &#39;bfill&#39;.&quot;</span><span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">nullable</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="p">(</span><span class="n">FloatType</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">)</span>
<span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="n">cond</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">isnull</span><span class="p">()</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="k">if</span> <span class="n">value</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="p">(</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">bool</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;Unsupported type </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">value</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
<span class="k">if</span> <span class="n">limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;limit parameter for value is not support now&quot;</span><span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">cond</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">method</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;ffill&quot;</span><span class="p">,</span> <span class="s2">&quot;pad&quot;</span><span class="p">]:</span>
<span class="n">func</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">last</span>
<span class="n">end</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span> <span class="o">-</span> <span class="mi">1</span>
<span class="k">if</span> <span class="n">limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">begin</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span> <span class="o">-</span> <span class="n">limit</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">begin</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span>
<span class="k">elif</span> <span class="n">method</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;bfill&quot;</span><span class="p">,</span> <span class="s2">&quot;backfill&quot;</span><span class="p">]:</span>
<span class="n">func</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">first</span>
<span class="n">begin</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span> <span class="o">+</span> <span class="mi">1</span>
<span class="k">if</span> <span class="n">limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">end</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span> <span class="o">+</span> <span class="n">limit</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">end</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">unboundedFollowing</span>
<span class="n">window</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">part_cols</span><span class="p">)</span>
<span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="n">begin</span><span class="p">,</span> <span class="n">end</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">cond</span><span class="p">,</span> <span class="n">func</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_spark_column</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">,</span> <span class="n">scol</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name_like_string</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">))</span> <span class="c1"># TODO: dtype?</span>
<span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<div class="viewcode-block" id="Series.interpolate"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.interpolate.html#pyspark.pandas.Series.interpolate">[docs]</a> <span class="k">def</span> <span class="nf">interpolate</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">method</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;linear&quot;</span><span class="p">,</span>
<span class="n">limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">limit_direction</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">limit_area</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_interpolate</span><span class="p">(</span>
<span class="n">method</span><span class="o">=</span><span class="n">method</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="n">limit</span><span class="p">,</span> <span class="n">limit_direction</span><span class="o">=</span><span class="n">limit_direction</span><span class="p">,</span> <span class="n">limit_area</span><span class="o">=</span><span class="n">limit_area</span>
<span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_interpolate</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">method</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;linear&quot;</span><span class="p">,</span>
<span class="n">limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">limit_direction</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">limit_area</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span> <span class="o">==</span> <span class="s2">&quot;object&quot;</span><span class="p">:</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
<span class="s2">&quot;Series.interpolate with object dtype is deprecated and will raise in a &quot;</span>
<span class="s2">&quot;future version. Convert to a specific numeric type before interpolating.&quot;</span><span class="p">,</span>
<span class="ne">FutureWarning</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">method</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;linear&quot;</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;interpolate currently works only for method=&#39;linear&#39;&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="p">(</span><span class="n">limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="ow">not</span> <span class="n">limit</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;limit must be &gt; 0.&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="p">(</span><span class="n">limit_direction</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span>
<span class="n">limit_direction</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;forward&quot;</span><span class="p">,</span> <span class="s2">&quot;backward&quot;</span><span class="p">,</span> <span class="s2">&quot;both&quot;</span><span class="p">]</span>
<span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;invalid limit_direction: &#39;</span><span class="si">{}</span><span class="s2">&#39;&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">limit_direction</span><span class="p">))</span>
<span class="k">if</span> <span class="p">(</span><span class="n">limit_area</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="n">limit_area</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;inside&quot;</span><span class="p">,</span> <span class="s2">&quot;outside&quot;</span><span class="p">]):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;invalid limit_area: &#39;</span><span class="si">{}</span><span class="s2">&#39;&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">limit_area</span><span class="p">))</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">nullable</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="p">(</span><span class="n">FloatType</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">)</span>
<span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">last_non_null</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">last</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
<span class="n">null_index</span> <span class="o">=</span> <span class="n">SF</span><span class="o">.</span><span class="n">null_index</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="n">Window</span> <span class="o">=</span> <span class="n">get_window_class</span><span class="p">()</span>
<span class="n">window_forward</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span>
<span class="p">)</span>
<span class="n">last_non_null_forward</span> <span class="o">=</span> <span class="n">last_non_null</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window_forward</span><span class="p">)</span>
<span class="n">null_index_forward</span> <span class="o">=</span> <span class="n">null_index</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window_forward</span><span class="p">)</span>
<span class="n">window_backward</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">desc</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">))</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span>
<span class="p">)</span>
<span class="n">last_non_null_backward</span> <span class="o">=</span> <span class="n">last_non_null</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window_backward</span><span class="p">)</span>
<span class="n">null_index_backward</span> <span class="o">=</span> <span class="n">null_index</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window_backward</span><span class="p">)</span>
<span class="n">fill</span> <span class="o">=</span> <span class="p">(</span><span class="n">last_non_null_backward</span> <span class="o">-</span> <span class="n">last_non_null_forward</span><span class="p">)</span> <span class="o">/</span> <span class="p">(</span>
<span class="n">null_index_backward</span> <span class="o">+</span> <span class="n">null_index_forward</span>
<span class="p">)</span> <span class="o">*</span> <span class="n">null_index_forward</span> <span class="o">+</span> <span class="n">last_non_null_forward</span>
<span class="n">fill_cond</span> <span class="o">=</span> <span class="o">~</span><span class="n">F</span><span class="o">.</span><span class="n">isnull</span><span class="p">(</span><span class="n">last_non_null_backward</span><span class="p">)</span> <span class="o">&amp;</span> <span class="o">~</span><span class="n">F</span><span class="o">.</span><span class="n">isnull</span><span class="p">(</span><span class="n">last_non_null_forward</span><span class="p">)</span>
<span class="n">pad_head</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
<span class="n">pad_head_cond</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="n">pad_tail</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span>
<span class="n">pad_tail_cond</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="c1"># inputs -&gt; NaN, NaN, 1.0, NaN, NaN, NaN, 5.0, NaN, NaN</span>
<span class="k">if</span> <span class="n">limit_direction</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">limit_direction</span> <span class="o">==</span> <span class="s2">&quot;forward&quot;</span><span class="p">:</span>
<span class="c1"># outputs -&gt; NaN, NaN, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 5.0</span>
<span class="n">pad_tail</span> <span class="o">=</span> <span class="n">last_non_null_forward</span>
<span class="n">pad_tail_cond</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">isnull</span><span class="p">(</span><span class="n">last_non_null_backward</span><span class="p">)</span> <span class="o">&amp;</span> <span class="o">~</span><span class="n">F</span><span class="o">.</span><span class="n">isnull</span><span class="p">(</span><span class="n">last_non_null_forward</span><span class="p">)</span>
<span class="k">if</span> <span class="n">limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># outputs (limit=1) -&gt; NaN, NaN, 1.0, 2.0, NaN, NaN, 5.0, 5.0, NaN</span>
<span class="n">fill_cond</span> <span class="o">=</span> <span class="n">fill_cond</span> <span class="o">&amp;</span> <span class="p">(</span><span class="n">null_index_forward</span> <span class="o">&lt;=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">limit</span><span class="p">))</span>
<span class="n">pad_tail_cond</span> <span class="o">=</span> <span class="n">pad_tail_cond</span> <span class="o">&amp;</span> <span class="p">(</span><span class="n">null_index_forward</span> <span class="o">&lt;=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">limit</span><span class="p">))</span>
<span class="k">elif</span> <span class="n">limit_direction</span> <span class="o">==</span> <span class="s2">&quot;backward&quot;</span><span class="p">:</span>
<span class="c1"># outputs -&gt; 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, NaN, NaN</span>
<span class="n">pad_head</span> <span class="o">=</span> <span class="n">last_non_null_backward</span>
<span class="n">pad_head_cond</span> <span class="o">=</span> <span class="o">~</span><span class="n">F</span><span class="o">.</span><span class="n">isnull</span><span class="p">(</span><span class="n">last_non_null_backward</span><span class="p">)</span> <span class="o">&amp;</span> <span class="n">F</span><span class="o">.</span><span class="n">isnull</span><span class="p">(</span><span class="n">last_non_null_forward</span><span class="p">)</span>
<span class="k">if</span> <span class="n">limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># outputs (limit=1) -&gt; NaN, 1.0, 1.0, NaN, NaN, 4.0, 5.0, NaN, NaN</span>
<span class="n">fill_cond</span> <span class="o">=</span> <span class="n">fill_cond</span> <span class="o">&amp;</span> <span class="p">(</span><span class="n">null_index_backward</span> <span class="o">&lt;=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">limit</span><span class="p">))</span>
<span class="n">pad_head_cond</span> <span class="o">=</span> <span class="n">pad_head_cond</span> <span class="o">&amp;</span> <span class="p">(</span><span class="n">null_index_backward</span> <span class="o">&lt;=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">limit</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># outputs -&gt; 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 5.0</span>
<span class="n">pad_head</span> <span class="o">=</span> <span class="n">last_non_null_backward</span>
<span class="n">pad_head_cond</span> <span class="o">=</span> <span class="o">~</span><span class="n">F</span><span class="o">.</span><span class="n">isnull</span><span class="p">(</span><span class="n">last_non_null_backward</span><span class="p">)</span> <span class="o">&amp;</span> <span class="n">F</span><span class="o">.</span><span class="n">isnull</span><span class="p">(</span><span class="n">last_non_null_forward</span><span class="p">)</span>
<span class="n">pad_tail</span> <span class="o">=</span> <span class="n">last_non_null_forward</span>
<span class="n">pad_tail_cond</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">isnull</span><span class="p">(</span><span class="n">last_non_null_backward</span><span class="p">)</span> <span class="o">&amp;</span> <span class="o">~</span><span class="n">F</span><span class="o">.</span><span class="n">isnull</span><span class="p">(</span><span class="n">last_non_null_forward</span><span class="p">)</span>
<span class="k">if</span> <span class="n">limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># outputs (limit=1) -&gt; NaN, 1.0, 1.0, 2.0, NaN, 4.0, 5.0, 5.0, NaN</span>
<span class="n">fill_cond</span> <span class="o">=</span> <span class="n">fill_cond</span> <span class="o">&amp;</span> <span class="p">(</span>
<span class="p">(</span><span class="n">null_index_forward</span> <span class="o">&lt;=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">limit</span><span class="p">))</span> <span class="o">|</span> <span class="p">(</span><span class="n">null_index_backward</span> <span class="o">&lt;=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">limit</span><span class="p">))</span>
<span class="p">)</span>
<span class="n">pad_head_cond</span> <span class="o">=</span> <span class="n">pad_head_cond</span> <span class="o">&amp;</span> <span class="p">(</span><span class="n">null_index_backward</span> <span class="o">&lt;=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">limit</span><span class="p">))</span>
<span class="n">pad_tail_cond</span> <span class="o">=</span> <span class="n">pad_tail_cond</span> <span class="o">&amp;</span> <span class="p">(</span><span class="n">null_index_forward</span> <span class="o">&lt;=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">limit</span><span class="p">))</span>
<span class="k">if</span> <span class="n">limit_area</span> <span class="o">==</span> <span class="s2">&quot;inside&quot;</span><span class="p">:</span>
<span class="n">pad_head_cond</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="n">pad_tail_cond</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">limit_area</span> <span class="o">==</span> <span class="s2">&quot;outside&quot;</span><span class="p">:</span>
<span class="n">fill_cond</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">False</span><span class="p">)</span>
<span class="n">cond</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">isnull</span><span class="p">()</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">scol</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">cond</span> <span class="o">&amp;</span> <span class="n">fill_cond</span><span class="p">,</span> <span class="n">fill</span><span class="p">)</span>
<span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">cond</span> <span class="o">&amp;</span> <span class="n">pad_head_cond</span><span class="p">,</span> <span class="n">pad_head</span><span class="p">)</span>
<span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">cond</span> <span class="o">&amp;</span> <span class="n">pad_tail_cond</span><span class="p">,</span> <span class="n">pad_tail</span><span class="p">)</span>
<span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_spark_column</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">,</span> <span class="n">scol</span><span class="p">)</span> <span class="c1"># TODO: dtype?</span>
<span class="p">)</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<div class="viewcode-block" id="Series.dropna"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.dropna.html#pyspark.pandas.Series.dropna">[docs]</a> <span class="k">def</span> <span class="nf">dropna</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> <span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return a new Series with missing values removed.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> axis : {0 or &#39;index&#39;}, default 0</span>
<span class="sd"> There is only one axis to drop values from.</span>
<span class="sd"> inplace : bool, default False</span>
<span class="sd"> If True, do operation inplace and return None.</span>
<span class="sd"> **kwargs</span>
<span class="sd"> Not in use.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with NA entries dropped from it.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; ser = ps.Series([1., 2., np.nan])</span>
<span class="sd"> &gt;&gt;&gt; ser</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Drop NA values from a Series.</span>
<span class="sd"> &gt;&gt;&gt; ser.dropna()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Keep the Series with valid entries in the same variable.</span>
<span class="sd"> &gt;&gt;&gt; ser.dropna(inplace=True)</span>
<span class="sd"> &gt;&gt;&gt; ser</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">inplace</span> <span class="o">=</span> <span class="n">validate_bool_kwarg</span><span class="p">(</span><span class="n">inplace</span><span class="p">,</span> <span class="s2">&quot;inplace&quot;</span><span class="p">)</span>
<span class="c1"># TODO: last two examples from pandas produce different results.</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">[[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]]</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.clip"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.clip.html#pyspark.pandas.Series.clip">[docs]</a> <span class="k">def</span> <span class="nf">clip</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">lower</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">upper</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Trim values at input threshold(s).</span>
<span class="sd"> Assigns values outside boundary-to-boundary values.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> lower : float or int, default None</span>
<span class="sd"> Minimum threshold value. All values below this threshold will be set to it.</span>
<span class="sd"> upper : float or int, default None</span>
<span class="sd"> Maximum threshold value. All values above this threshold will be set to it.</span>
<span class="sd"> inplace : bool, default False</span>
<span class="sd"> if True, perform operation in-place</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with the values outside the clip boundaries replaced</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([0, 2, 4])</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 0 0</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; psser.clip(1, 3)</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Clip can be performed in-place.</span>
<span class="sd"> &gt;&gt;&gt; psser.clip(2, 3, inplace=True)</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 0 2</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> One difference between this implementation and pandas is that running</span>
<span class="sd"> `pd.Series([&#39;a&#39;, &#39;b&#39;]).clip(0, 1)` will crash with &quot;TypeError: &#39;&lt;=&#39; not supported between</span>
<span class="sd"> instances of &#39;str&#39; and &#39;int&#39;&quot; while `ps.Series([&#39;a&#39;, &#39;b&#39;]).clip(0, 1)` will output the</span>
<span class="sd"> original Series, simply ignoring the incompatible types.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">is_list_like</span><span class="p">(</span><span class="n">lower</span><span class="p">)</span> <span class="ow">or</span> <span class="n">is_list_like</span><span class="p">(</span><span class="n">upper</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;List-like value are not supported for &#39;lower&#39; and &#39;upper&#39; at the &quot;</span> <span class="o">+</span> <span class="s2">&quot;moment&quot;</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">lower</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">upper</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">):</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="k">if</span> <span class="n">lower</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">scol</span> <span class="o">&lt;</span> <span class="n">lower</span><span class="p">,</span> <span class="n">lower</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="k">if</span> <span class="n">upper</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">scol</span> <span class="o">&gt;</span> <span class="n">upper</span><span class="p">,</span> <span class="n">upper</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])],</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]],</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_update_internal_frame</span><span class="p">(</span><span class="n">internal</span><span class="p">,</span> <span class="n">check_same_anchor</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span>
<span class="n">scol</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]),</span>
<span class="n">field</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="Series.drop"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.drop.html#pyspark.pandas.Series.drop">[docs]</a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">labels</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Name</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">index</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Name</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">columns</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Name</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">level</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return Series with specified index labels removed.</span>
<span class="sd"> Remove elements of a Series based on specifying the index labels.</span>
<span class="sd"> When using a multi-index, labels on different levels can be removed by specifying the level.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> labels : single label or list-like</span>
<span class="sd"> Index labels to drop.</span>
<span class="sd"> index : single label or list-like</span>
<span class="sd"> Redundant for application on Series, but index can be used instead of labels.</span>
<span class="sd"> columns : single label or list-like</span>
<span class="sd"> No change is made to the Series; use ‘index’ or ‘labels’ instead.</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> level : int or level name, optional</span>
<span class="sd"> For MultiIndex, level for which the labels will be removed.</span>
<span class="sd"> inplace: bool, default False</span>
<span class="sd"> If True, do operation inplace and return None</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with specified index labels removed.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.dropna</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data=np.arange(3), index=[&#39;A&#39;, &#39;B&#39;, &#39;C&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> A 0</span>
<span class="sd"> B 1</span>
<span class="sd"> C 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Drop single label A</span>
<span class="sd"> &gt;&gt;&gt; s.drop(&#39;A&#39;)</span>
<span class="sd"> B 1</span>
<span class="sd"> C 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Drop labels B and C</span>
<span class="sd"> &gt;&gt;&gt; s.drop(labels=[&#39;B&#39;, &#39;C&#39;])</span>
<span class="sd"> A 0</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> With &#39;index&#39; rather than &#39;labels&#39; returns exactly same result.</span>
<span class="sd"> &gt;&gt;&gt; s.drop(index=&#39;A&#39;)</span>
<span class="sd"> B 1</span>
<span class="sd"> C 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.drop(index=[&#39;B&#39;, &#39;C&#39;])</span>
<span class="sd"> A 0</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> With &#39;columns&#39;, no change is made to the Series.</span>
<span class="sd"> &gt;&gt;&gt; s.drop(columns=[&#39;A&#39;])</span>
<span class="sd"> A 0</span>
<span class="sd"> B 1</span>
<span class="sd"> C 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> With &#39;inplace=True&#39;, do operation inplace and return None.</span>
<span class="sd"> &gt;&gt;&gt; s.drop(index=[&#39;B&#39;, &#39;C&#39;], inplace=True)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> A 0</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Also support for MultiIndex</span>
<span class="sd"> &gt;&gt;&gt; midx = pd.MultiIndex([[&#39;lama&#39;, &#39;cow&#39;, &#39;falcon&#39;],</span>
<span class="sd"> ... [&#39;speed&#39;, &#39;weight&#39;, &#39;length&#39;]],</span>
<span class="sd"> ... [[0, 0, 0, 1, 1, 1, 2, 2, 2],</span>
<span class="sd"> ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],</span>
<span class="sd"> ... index=midx)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> lama speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.drop(labels=&#39;weight&#39;, level=1)</span>
<span class="sd"> lama speed 45.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.drop((&#39;lama&#39;, &#39;weight&#39;))</span>
<span class="sd"> lama speed 45.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.drop([(&#39;lama&#39;, &#39;speed&#39;), (&#39;falcon&#39;, &#39;weight&#39;)])</span>
<span class="sd"> lama weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">dropped</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_drop</span><span class="p">(</span>
<span class="n">labels</span><span class="o">=</span><span class="n">labels</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="n">index</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="n">level</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="n">inplace</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="n">columns</span>
<span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span> <span class="k">if</span> <span class="n">dropped</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">first_series</span><span class="p">(</span><span class="n">dropped</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_drop</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">labels</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Name</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">index</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Name</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">level</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">columns</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Name</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="n">DataFrame</span><span class="p">]:</span>
<span class="k">if</span> <span class="n">labels</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="n">columns</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">index</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Cannot specify both &#39;labels&#39; and &#39;index&#39;/&#39;columns&#39;&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_drop</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="n">labels</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="n">level</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="n">inplace</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="n">columns</span><span class="p">)</span>
<span class="k">if</span> <span class="n">index</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span>
<span class="k">if</span> <span class="n">level</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">level</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">if</span> <span class="n">level</span> <span class="o">&gt;=</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_level</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;&#39;level&#39; should be less than the number of indexes&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">is_name_like_tuple</span><span class="p">(</span><span class="n">index</span><span class="p">):</span>
<span class="n">index_list</span> <span class="o">=</span> <span class="p">[</span><span class="n">cast</span><span class="p">(</span><span class="n">Label</span><span class="p">,</span> <span class="n">index</span><span class="p">)]</span>
<span class="k">elif</span> <span class="n">is_name_like_value</span><span class="p">(</span><span class="n">index</span><span class="p">):</span>
<span class="n">index_list</span> <span class="o">=</span> <span class="p">[(</span><span class="n">index</span><span class="p">,)]</span>
<span class="k">elif</span> <span class="nb">all</span><span class="p">(</span><span class="n">is_name_like_value</span><span class="p">(</span><span class="n">idxes</span><span class="p">,</span> <span class="n">allow_tuple</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="k">for</span> <span class="n">idxes</span> <span class="ow">in</span> <span class="n">index</span><span class="p">):</span>
<span class="n">index_list</span> <span class="o">=</span> <span class="p">[(</span><span class="n">idex</span><span class="p">,)</span> <span class="k">for</span> <span class="n">idex</span> <span class="ow">in</span> <span class="n">index</span><span class="p">]</span>
<span class="k">elif</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">(</span><span class="n">is_name_like_tuple</span><span class="p">(</span><span class="n">idxes</span><span class="p">)</span> <span class="k">for</span> <span class="n">idxes</span> <span class="ow">in</span> <span class="n">index</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;If the given index is a list, it &quot;</span>
<span class="s2">&quot;should only contains names as all tuples or all non tuples &quot;</span>
<span class="s2">&quot;that contain index names&quot;</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">index_list</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="n">List</span><span class="p">[</span><span class="n">Label</span><span class="p">],</span> <span class="n">index</span><span class="p">)</span>
<span class="n">drop_index_scols</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">idxes</span> <span class="ow">in</span> <span class="n">index_list</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">index_scols</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">[</span><span class="n">lvl</span><span class="p">]</span> <span class="o">==</span> <span class="n">idx</span>
<span class="k">for</span> <span class="n">lvl</span><span class="p">,</span> <span class="n">idx</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">idxes</span><span class="p">,</span> <span class="n">level</span><span class="p">)</span>
<span class="p">]</span>
<span class="k">except</span> <span class="ne">IndexError</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span>
<span class="s2">&quot;Key length (</span><span class="si">{}</span><span class="s2">) exceeds index depth (</span><span class="si">{}</span><span class="s2">)&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">internal</span><span class="o">.</span><span class="n">index_level</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">idxes</span><span class="p">)</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="n">drop_index_scols</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span> <span class="n">index_scols</span><span class="p">))</span>
<span class="n">cond</span> <span class="o">=</span> <span class="o">~</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">|</span> <span class="n">y</span><span class="p">,</span> <span class="n">drop_index_scols</span><span class="p">)</span>
<span class="n">dropped_internal</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">with_filter</span><span class="p">(</span><span class="n">cond</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">dropped_internal</span><span class="p">))</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">dropped_internal</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">columns</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Need to specify at least one of &#39;labels&#39;, &#39;index&#39; or &#39;columns&#39;&quot;</span><span class="p">)</span>
<div class="viewcode-block" id="Series.head"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.head.html#pyspark.pandas.Series.head">[docs]</a> <span class="k">def</span> <span class="nf">head</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the first n rows.</span>
<span class="sd"> This function returns the first n rows for the object based on position.</span>
<span class="sd"> It is useful for quickly testing if your object has the right type of data in it.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> n : Integer, default = 5</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> The first n rows of the caller object.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;animal&#39;:[&#39;alligator&#39;, &#39;bee&#39;, &#39;falcon&#39;, &#39;lion&#39;]})</span>
<span class="sd"> &gt;&gt;&gt; df.animal.head(2) # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> 0 alligator</span>
<span class="sd"> 1 bee</span>
<span class="sd"> Name: animal, dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="n">n</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.last"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.last.html#pyspark.pandas.Series.last">[docs]</a> <span class="k">def</span> <span class="nf">last</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">offset</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">DateOffset</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Select final periods of time series data based on a date offset.</span>
<span class="sd"> When having a Series with dates as index, this function can</span>
<span class="sd"> select the last few elements based on a date offset.</span>
<span class="sd"> .. deprecated:: 4.0.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> offset : str or DateOffset</span>
<span class="sd"> The offset length of the data that will be selected. For instance,</span>
<span class="sd"> &#39;3D&#39; will display all the rows having their index within the last 3 days.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> A subset of the caller.</span>
<span class="sd"> Raises</span>
<span class="sd"> ------</span>
<span class="sd"> TypeError</span>
<span class="sd"> If the index is not a :class:`DatetimeIndex`</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; index = pd.date_range(&#39;2018-04-09&#39;, periods=4, freq=&#39;2D&#39;)</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([1, 2, 3, 4], index=index)</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 2018-04-09 1</span>
<span class="sd"> 2018-04-11 2</span>
<span class="sd"> 2018-04-13 3</span>
<span class="sd"> 2018-04-15 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Get the rows for the last 3 days:</span>
<span class="sd"> &gt;&gt;&gt; psser.last(&#39;3D&#39;)</span>
<span class="sd"> 2018-04-13 3</span>
<span class="sd"> 2018-04-15 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Notice the data for 3 last calendar days were returned, not the last</span>
<span class="sd"> 3 observed days in the dataset, and therefore data for 2018-04-11 was</span>
<span class="sd"> not returned.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
<span class="s2">&quot;last is deprecated and will be removed in a future version. &quot;</span>
<span class="s2">&quot;Please create a mask and filter using `.loc` instead&quot;</span><span class="p">,</span>
<span class="ne">FutureWarning</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">last</span><span class="p">(</span><span class="n">offset</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.first"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.first.html#pyspark.pandas.Series.first">[docs]</a> <span class="k">def</span> <span class="nf">first</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">offset</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">DateOffset</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Select first periods of time series data based on a date offset.</span>
<span class="sd"> When having a Series with dates as index, this function can</span>
<span class="sd"> select the first few elements based on a date offset.</span>
<span class="sd"> .. deprecated:: 4.0.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> offset : str or DateOffset</span>
<span class="sd"> The offset length of the data that will be selected. For instance,</span>
<span class="sd"> &#39;3D&#39; will display all the rows having their index within the first 3 days.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> A subset of the caller.</span>
<span class="sd"> Raises</span>
<span class="sd"> ------</span>
<span class="sd"> TypeError</span>
<span class="sd"> If the index is not a :class:`DatetimeIndex`</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; index = pd.date_range(&#39;2018-04-09&#39;, periods=4, freq=&#39;2D&#39;)</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([1, 2, 3, 4], index=index)</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 2018-04-09 1</span>
<span class="sd"> 2018-04-11 2</span>
<span class="sd"> 2018-04-13 3</span>
<span class="sd"> 2018-04-15 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Get the rows for the first 3 days:</span>
<span class="sd"> &gt;&gt;&gt; psser.first(&#39;3D&#39;)</span>
<span class="sd"> 2018-04-09 1</span>
<span class="sd"> 2018-04-11 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Notice the data for 3 first calendar days were returned, not the first</span>
<span class="sd"> 3 observed days in the dataset, and therefore data for 2018-04-13 was</span>
<span class="sd"> not returned.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
<span class="s2">&quot;first is deprecated and will be removed in a future version. &quot;</span>
<span class="s2">&quot;Please create a mask and filter using `.loc` instead&quot;</span><span class="p">,</span>
<span class="ne">FutureWarning</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">first</span><span class="p">(</span><span class="n">offset</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<span class="c1"># TODO: Categorical type isn&#39;t supported (due to PySpark&#39;s limitation) and</span>
<span class="c1"># some doctests related with timestamps were not added.</span>
<div class="viewcode-block" id="Series.unique"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.unique.html#pyspark.pandas.Series.unique">[docs]</a> <span class="k">def</span> <span class="nf">unique</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return unique values of Series object.</span>
<span class="sd"> Uniques are returned in order of appearance. Hash table-based unique,</span>
<span class="sd"> therefore does NOT sort.</span>
<span class="sd"> .. note:: This method returns newly created Series whereas pandas returns</span>
<span class="sd"> the unique values as a NumPy array.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Returns the unique values as a Series.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Index.unique</span>
<span class="sd"> groupby.SeriesGroupBy.unique</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([2, 1, 3, 3], name=&#39;A&#39;)</span>
<span class="sd"> &gt;&gt;&gt; psser.unique().sort_values()</span>
<span class="sd"> 1 1</span>
<span class="sd"> 0 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> Name: A, dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; ps.Series([pd.Timestamp(&#39;2016-01-01&#39;) for _ in range(3)]).unique()</span>
<span class="sd"> 0 2016-01-01</span>
<span class="sd"> dtype: datetime64[ns]</span>
<span class="sd"> &gt;&gt;&gt; psser.name = (&#39;x&#39;, &#39;a&#39;)</span>
<span class="sd"> &gt;&gt;&gt; psser.unique().sort_values()</span>
<span class="sd"> 1 1</span>
<span class="sd"> 0 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> Name: (x, a), dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">column_labels</span><span class="o">=</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">],</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])],</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]],</span>
<span class="n">column_label_names</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_label_names</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span></div>
<div class="viewcode-block" id="Series.sort_values"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.sort_values.html#pyspark.pandas.Series.sort_values">[docs]</a> <span class="k">def</span> <span class="nf">sort_values</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">ascending</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">na_position</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;last&quot;</span><span class="p">,</span>
<span class="n">ignore_index</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Sort by the values.</span>
<span class="sd"> Sort a Series in ascending or descending order by some criterion.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> ascending : bool or list of bool, default True</span>
<span class="sd"> Sort ascending vs. descending. Specify list for multiple sort</span>
<span class="sd"> orders. If this is a list of bools, must match the length of</span>
<span class="sd"> the by.</span>
<span class="sd"> inplace : bool, default False</span>
<span class="sd"> if True, perform operation in-place</span>
<span class="sd"> na_position : {&#39;first&#39;, &#39;last&#39;}, default &#39;last&#39;</span>
<span class="sd"> `first` puts NaNs at the beginning, `last` puts NaNs at the end</span>
<span class="sd"> ignore_index : bool, default False</span>
<span class="sd"> If True, the resulting axis will be labeled 0, 1, …, n - 1.</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> sorted_obj : Series ordered by values.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([np.nan, 1, 3, 10, 5])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 10.0</span>
<span class="sd"> 4 5.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Sort values ascending order (default behaviour)</span>
<span class="sd"> &gt;&gt;&gt; s.sort_values(ascending=True)</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 4 5.0</span>
<span class="sd"> 3 10.0</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Sort values descending order</span>
<span class="sd"> &gt;&gt;&gt; s.sort_values(ascending=False)</span>
<span class="sd"> 3 10.0</span>
<span class="sd"> 4 5.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Sort values descending order and ignoring index</span>
<span class="sd"> &gt;&gt;&gt; s.sort_values(ascending=False, ignore_index=True)</span>
<span class="sd"> 0 10.0</span>
<span class="sd"> 1 5.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 1.0</span>
<span class="sd"> 4 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Sort values inplace</span>
<span class="sd"> &gt;&gt;&gt; s.sort_values(ascending=False, inplace=True)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 3 10.0</span>
<span class="sd"> 4 5.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Sort values putting NAs first</span>
<span class="sd"> &gt;&gt;&gt; s.sort_values(na_position=&#39;first&#39;)</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 4 5.0</span>
<span class="sd"> 3 10.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Sort a series of strings</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&#39;z&#39;, &#39;b&#39;, &#39;d&#39;, &#39;a&#39;, &#39;c&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 z</span>
<span class="sd"> 1 b</span>
<span class="sd"> 2 d</span>
<span class="sd"> 3 a</span>
<span class="sd"> 4 c</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; s.sort_values()</span>
<span class="sd"> 3 a</span>
<span class="sd"> 1 b</span>
<span class="sd"> 4 c</span>
<span class="sd"> 2 d</span>
<span class="sd"> 0 z</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">inplace</span> <span class="o">=</span> <span class="n">validate_bool_kwarg</span><span class="p">(</span><span class="n">inplace</span><span class="p">,</span> <span class="s2">&quot;inplace&quot;</span><span class="p">)</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">[[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]]</span><span class="o">.</span><span class="n">_sort</span><span class="p">(</span>
<span class="n">by</span><span class="o">=</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">],</span> <span class="n">ascending</span><span class="o">=</span><span class="n">ascending</span><span class="p">,</span> <span class="n">na_position</span><span class="o">=</span><span class="n">na_position</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="k">if</span> <span class="n">ignore_index</span><span class="p">:</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">drop</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="n">inplace</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">drop</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span> <span class="k">if</span> <span class="n">ignore_index</span> <span class="k">else</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.sort_index"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.sort_index.html#pyspark.pandas.Series.sort_index">[docs]</a> <span class="k">def</span> <span class="nf">sort_index</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="n">level</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ascending</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">kind</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">na_position</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;last&quot;</span><span class="p">,</span>
<span class="n">ignore_index</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Sort object by labels (along an axis)</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> axis : index, columns to direct sorting. Currently, only axis = 0 is supported.</span>
<span class="sd"> level : int or level name or list of ints or list of level names</span>
<span class="sd"> if not None, sort on values in specified index level(s)</span>
<span class="sd"> ascending : boolean, default True</span>
<span class="sd"> Sort ascending vs. descending</span>
<span class="sd"> inplace : bool, default False</span>
<span class="sd"> if True, perform operation in-place</span>
<span class="sd"> kind : str, default None</span>
<span class="sd"> pandas-on-Spark does not allow specifying the sorting algorithm now,</span>
<span class="sd"> default None</span>
<span class="sd"> na_position : {‘first’, ‘last’}, default ‘last’</span>
<span class="sd"> first puts NaNs at the beginning, last puts NaNs at the end. Not implemented for</span>
<span class="sd"> MultiIndex.</span>
<span class="sd"> ignore_index : bool, default False</span>
<span class="sd"> If True, the resulting axis will be labeled 0, 1, …, n - 1.</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> sorted_obj : Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 1, np.nan], index=[&#39;b&#39;, &#39;a&#39;, np.nan])</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index() # doctest: +SKIP</span>
<span class="sd"> a 1.0</span>
<span class="sd"> b 2.0</span>
<span class="sd"> None NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index(ignore_index=True)</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index(ascending=False) # doctest: +SKIP</span>
<span class="sd"> b 2.0</span>
<span class="sd"> a 1.0</span>
<span class="sd"> None NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index(na_position=&#39;first&#39;) # doctest: +SKIP</span>
<span class="sd"> None NaN</span>
<span class="sd"> a 1.0</span>
<span class="sd"> b 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index(inplace=True)</span>
<span class="sd"> &gt;&gt;&gt; s # doctest: +SKIP</span>
<span class="sd"> a 1.0</span>
<span class="sd"> b 2.0</span>
<span class="sd"> None NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Multi-index series.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(range(4), index=[[&#39;b&#39;, &#39;b&#39;, &#39;a&#39;, &#39;a&#39;], [1, 0, 1, 0]], name=&#39;0&#39;)</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> a 0 3</span>
<span class="sd"> 1 2</span>
<span class="sd"> b 0 1</span>
<span class="sd"> 1 0</span>
<span class="sd"> Name: 0, dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index(level=1) # doctest: +SKIP</span>
<span class="sd"> a 0 3</span>
<span class="sd"> b 0 1</span>
<span class="sd"> a 1 2</span>
<span class="sd"> b 1 0</span>
<span class="sd"> Name: 0, dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index(level=[1, 0])</span>
<span class="sd"> a 0 3</span>
<span class="sd"> b 0 1</span>
<span class="sd"> a 1 2</span>
<span class="sd"> b 1 0</span>
<span class="sd"> Name: 0, dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">inplace</span> <span class="o">=</span> <span class="n">validate_bool_kwarg</span><span class="p">(</span><span class="n">inplace</span><span class="p">,</span> <span class="s2">&quot;inplace&quot;</span><span class="p">)</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">[[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]]</span><span class="o">.</span><span class="n">sort_index</span><span class="p">(</span>
<span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="n">level</span><span class="p">,</span> <span class="n">ascending</span><span class="o">=</span><span class="n">ascending</span><span class="p">,</span> <span class="n">kind</span><span class="o">=</span><span class="n">kind</span><span class="p">,</span> <span class="n">na_position</span><span class="o">=</span><span class="n">na_position</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="k">if</span> <span class="n">ignore_index</span><span class="p">:</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">drop</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="n">inplace</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">drop</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span> <span class="k">if</span> <span class="n">ignore_index</span> <span class="k">else</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.swaplevel"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.swaplevel.html#pyspark.pandas.Series.swaplevel">[docs]</a> <span class="k">def</span> <span class="nf">swaplevel</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">i</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Name</span><span class="p">]</span> <span class="o">=</span> <span class="o">-</span><span class="mi">2</span><span class="p">,</span> <span class="n">j</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Name</span><span class="p">]</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">copy</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Swap levels i and j in a MultiIndex.</span>
<span class="sd"> Default is to swap the two innermost levels of the index.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> i, j : int, str</span>
<span class="sd"> Level of the indices to be swapped. Can pass level name as string.</span>
<span class="sd"> copy : bool, default True</span>
<span class="sd"> Whether to copy underlying data. Must be True.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with levels swapped in MultiIndex.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; midx = pd.MultiIndex.from_arrays([[&#39;a&#39;, &#39;b&#39;], [1, 2]], names = [&#39;word&#39;, &#39;number&#39;])</span>
<span class="sd"> &gt;&gt;&gt; midx # doctest: +SKIP</span>
<span class="sd"> MultiIndex([(&#39;a&#39;, 1),</span>
<span class="sd"> (&#39;b&#39;, 2)],</span>
<span class="sd"> names=[&#39;word&#39;, &#39;number&#39;])</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([&#39;x&#39;, &#39;y&#39;], index=midx)</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> word number</span>
<span class="sd"> a 1 x</span>
<span class="sd"> b 2 y</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; psser.swaplevel()</span>
<span class="sd"> number word</span>
<span class="sd"> 1 a x</span>
<span class="sd"> 2 b y</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; psser.swaplevel(0, 1)</span>
<span class="sd"> number word</span>
<span class="sd"> 1 a x</span>
<span class="sd"> 2 b y</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; psser.swaplevel(&#39;number&#39;, &#39;word&#39;)</span>
<span class="sd"> number word</span>
<span class="sd"> 1 a x</span>
<span class="sd"> 2 b y</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">copy</span> <span class="ow">is</span> <span class="kc">True</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">swaplevel</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.swapaxes"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.swapaxes.html#pyspark.pandas.Series.swapaxes">[docs]</a> <span class="k">def</span> <span class="nf">swapaxes</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">i</span><span class="p">:</span> <span class="n">Axis</span><span class="p">,</span> <span class="n">j</span><span class="p">:</span> <span class="n">Axis</span><span class="p">,</span> <span class="n">copy</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Interchange axes and swap values axes appropriately.</span>
<span class="sd"> .. deprecated:: 4.0.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> i: {0 or &#39;index&#39;, 1 or &#39;columns&#39;}. The axis to swap.</span>
<span class="sd"> j: {0 or &#39;index&#39;, 1 or &#39;columns&#39;}. The axis to swap.</span>
<span class="sd"> copy : bool, default True.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([1, 2, 3], index=[&quot;x&quot;, &quot;y&quot;, &quot;z&quot;])</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> x 1</span>
<span class="sd"> y 2</span>
<span class="sd"> z 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt;</span>
<span class="sd"> &gt;&gt;&gt; psser.swapaxes(0, 0)</span>
<span class="sd"> x 1</span>
<span class="sd"> y 2</span>
<span class="sd"> z 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
<span class="s2">&quot;&#39;Series.swapaxes&#39; is deprecated and will be removed in a future version. &quot;</span>
<span class="s2">&quot;Please use &#39;Series.transpose&#39; instead.&quot;</span><span class="p">,</span>
<span class="ne">FutureWarning</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">assert</span> <span class="n">copy</span> <span class="ow">is</span> <span class="kc">True</span>
<span class="n">i</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">i</span><span class="p">)</span>
<span class="n">j</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">j</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">i</span> <span class="o">==</span> <span class="n">j</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Axis must be 0 for Series&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span></div>
<div class="viewcode-block" id="Series.add_prefix"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.add_prefix.html#pyspark.pandas.Series.add_prefix">[docs]</a> <span class="k">def</span> <span class="nf">add_prefix</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">prefix</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Prefix labels with string `prefix`.</span>
<span class="sd"> For Series, the row labels are prefixed.</span>
<span class="sd"> For DataFrame, the column labels are prefixed.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> prefix : str</span>
<span class="sd"> The string to add before each label.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> New Series with updated labels.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.add_suffix: Suffix column labels with string `suffix`.</span>
<span class="sd"> DataFrame.add_suffix: Suffix column labels with string `suffix`.</span>
<span class="sd"> DataFrame.add_prefix: Prefix column labels with string `prefix`.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.add_prefix(&#39;item_&#39;)</span>
<span class="sd"> item_0 1</span>
<span class="sd"> item_1 2</span>
<span class="sd"> item_2 3</span>
<span class="sd"> item_3 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">prefix</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">resolved_copy</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="p">[</span>
<span class="n">F</span><span class="o">.</span><span class="n">concat</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">prefix</span><span class="p">),</span> <span class="n">index_spark_column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">index_spark_column_name</span><span class="p">)</span>
<span class="k">for</span> <span class="n">index_spark_column</span><span class="p">,</span> <span class="n">index_spark_column_name</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span>
<span class="n">internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">,</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_spark_column_names</span>
<span class="p">)</span>
<span class="p">]</span>
<span class="o">+</span> <span class="n">internal</span><span class="o">.</span><span class="n">data_spark_columns</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span>
<span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="o">.</span><span class="n">with_new_sdf</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">index_fields</span><span class="o">=</span><span class="p">([</span><span class="kc">None</span><span class="p">]</span> <span class="o">*</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_level</span><span class="p">)))</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="Series.add_suffix"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.add_suffix.html#pyspark.pandas.Series.add_suffix">[docs]</a> <span class="k">def</span> <span class="nf">add_suffix</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">suffix</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Suffix labels with string suffix.</span>
<span class="sd"> For Series, the row labels are suffixed.</span>
<span class="sd"> For DataFrame, the column labels are suffixed.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> suffix : str</span>
<span class="sd"> The string to add after each label.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> New Series with updated labels.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.add_prefix: Prefix row labels with string `prefix`.</span>
<span class="sd"> DataFrame.add_prefix: Prefix column labels with string `prefix`.</span>
<span class="sd"> DataFrame.add_suffix: Suffix column labels with string `suffix`.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.add_suffix(&#39;_item&#39;)</span>
<span class="sd"> 0_item 1</span>
<span class="sd"> 1_item 2</span>
<span class="sd"> 2_item 3</span>
<span class="sd"> 3_item 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">suffix</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">resolved_copy</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="p">[</span>
<span class="n">F</span><span class="o">.</span><span class="n">concat</span><span class="p">(</span><span class="n">index_spark_column</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">suffix</span><span class="p">))</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">index_spark_column_name</span><span class="p">)</span>
<span class="k">for</span> <span class="n">index_spark_column</span><span class="p">,</span> <span class="n">index_spark_column_name</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span>
<span class="n">internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">,</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_spark_column_names</span>
<span class="p">)</span>
<span class="p">]</span>
<span class="o">+</span> <span class="n">internal</span><span class="o">.</span><span class="n">data_spark_columns</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span>
<span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="o">.</span><span class="n">with_new_sdf</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">index_fields</span><span class="o">=</span><span class="p">([</span><span class="kc">None</span><span class="p">]</span> <span class="o">*</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_level</span><span class="p">)))</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="Series.autocorr"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.autocorr.html#pyspark.pandas.Series.autocorr">[docs]</a> <span class="k">def</span> <span class="nf">autocorr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">lag</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compute the lag-N autocorrelation.</span>
<span class="sd"> This method computes the Pearson correlation between</span>
<span class="sd"> the Series and its shifted self.</span>
<span class="sd"> .. note:: the current implementation of rank uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to moveing all data into</span>
<span class="sd"> a single partition in a single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method with very large datasets.</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> lag : int, default 1</span>
<span class="sd"> Number of lags to apply before performing autocorrelation.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> float</span>
<span class="sd"> The Pearson correlation between self and self.shift(lag).</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.corr : Compute the correlation between two Series.</span>
<span class="sd"> Series.shift : Shift index by desired number of periods.</span>
<span class="sd"> DataFrame.corr : Compute pairwise correlation of columns.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> If the Pearson correlation is not well defined return &#39;NaN&#39;.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([.2, .0, .6, .2, np.nan, .5, .6])</span>
<span class="sd"> &gt;&gt;&gt; s.autocorr() # doctest: +ELLIPSIS</span>
<span class="sd"> -0.141219...</span>
<span class="sd"> &gt;&gt;&gt; s.autocorr(0) # doctest: +ELLIPSIS</span>
<span class="sd"> 1.0...</span>
<span class="sd"> &gt;&gt;&gt; s.autocorr(2) # doctest: +ELLIPSIS</span>
<span class="sd"> 0.970725...</span>
<span class="sd"> &gt;&gt;&gt; s.autocorr(-3) # doctest: +ELLIPSIS</span>
<span class="sd"> 0.277350...</span>
<span class="sd"> &gt;&gt;&gt; s.autocorr(5) # doctest: +ELLIPSIS</span>
<span class="sd"> -1.000000...</span>
<span class="sd"> &gt;&gt;&gt; s.autocorr(6) # doctest: +ELLIPSIS</span>
<span class="sd"> nan</span>
<span class="sd"> If the Pearson correlation is not well defined, then &#39;NaN&#39; is returned.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 0, 0, 0])</span>
<span class="sd"> &gt;&gt;&gt; s.autocorr()</span>
<span class="sd"> nan</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># This implementation is suboptimal because it moves all data to a single partition,</span>
<span class="c1"># global sort should be used instead of window, but it should be a start</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">lag</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;lag should be an int; however, got [</span><span class="si">%s</span><span class="s2">]&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">lag</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="k">if</span> <span class="n">lag</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">corr</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">corr</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">scol</span><span class="p">))</span><span class="o">.</span><span class="n">head</span><span class="p">()[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">lag_scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">lag</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">lag</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">))</span>
<span class="n">lag_col_name</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="s2">&quot;__autocorr_lag_tmp_col__&quot;</span><span class="p">)</span>
<span class="n">corr</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">sdf</span><span class="o">.</span><span class="n">withColumn</span><span class="p">(</span><span class="n">lag_col_name</span><span class="p">,</span> <span class="n">lag_scol</span><span class="p">)</span>
<span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">corr</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">lag_col_name</span><span class="p">)))</span>
<span class="o">.</span><span class="n">head</span><span class="p">()[</span><span class="mi">0</span><span class="p">]</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span> <span class="k">if</span> <span class="n">corr</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">corr</span></div>
<div class="viewcode-block" id="Series.corr"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.corr.html#pyspark.pandas.Series.corr">[docs]</a> <span class="k">def</span> <span class="nf">corr</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">method</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;pearson&quot;</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compute correlation with `other` Series, excluding missing values.</span>
<span class="sd"> .. versionadded:: 3.3.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series</span>
<span class="sd"> method : {&#39;pearson&#39;, &#39;spearman&#39;, &#39;kendall&#39;}</span>
<span class="sd"> * pearson : standard correlation coefficient</span>
<span class="sd"> * spearman : Spearman rank correlation</span>
<span class="sd"> * kendall : Kendall Tau correlation coefficient</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> support &#39;kendall&#39; for method parameter</span>
<span class="sd"> min_periods : int, optional</span>
<span class="sd"> Minimum number of observations needed to have a valid result.</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> correlation : float</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> The complexity of Kendall correlation is O(#row * #row), if the dataset is too</span>
<span class="sd"> large, sampling ahead of correlation computation is recommended.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;s1&#39;: [.2, .0, .6, .2],</span>
<span class="sd"> ... &#39;s2&#39;: [.3, .6, .0, .1]})</span>
<span class="sd"> &gt;&gt;&gt; s1 = df.s1</span>
<span class="sd"> &gt;&gt;&gt; s2 = df.s2</span>
<span class="sd"> &gt;&gt;&gt; s1.corr(s2, method=&#39;pearson&#39;)</span>
<span class="sd"> -0.85106...</span>
<span class="sd"> &gt;&gt;&gt; s1.corr(s2, method=&#39;spearman&#39;)</span>
<span class="sd"> -0.94868...</span>
<span class="sd"> &gt;&gt;&gt; s1.corr(s2, method=&#39;kendall&#39;)</span>
<span class="sd"> -0.91287...</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([1, np.nan, 2, 1, 1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([3, 4, 1, 1, 5])</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.ops_on_diff_frames&quot;, True):</span>
<span class="sd"> ... s1.corr(s2, method=&quot;pearson&quot;)</span>
<span class="sd"> -0.52223...</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.ops_on_diff_frames&quot;, True):</span>
<span class="sd"> ... s1.corr(s2, method=&quot;spearman&quot;)</span>
<span class="sd"> -0.54433...</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.ops_on_diff_frames&quot;, True):</span>
<span class="sd"> ... s1.corr(s2, method=&quot;kendall&quot;)</span>
<span class="sd"> -0.51639...</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.ops_on_diff_frames&quot;, True):</span>
<span class="sd"> ... s1.corr(s2, method=&quot;kendall&quot;, min_periods=5)</span>
<span class="sd"> nan</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">method</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;pearson&quot;</span><span class="p">,</span> <span class="s2">&quot;spearman&quot;</span><span class="p">,</span> <span class="s2">&quot;kendall&quot;</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Invalid method </span><span class="si">{</span><span class="n">method</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Series</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;&#39;other&#39; must be a Series&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">min_periods</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">min_periods</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Invalid min_periods type </span><span class="si">{</span><span class="nb">type</span><span class="p">(</span><span class="n">min_periods</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="n">min_periods</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">min_periods</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">min_periods</span>
<span class="k">if</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
<span class="n">combined</span> <span class="o">=</span> <span class="bp">self</span>
<span class="n">this</span> <span class="o">=</span> <span class="bp">self</span>
<span class="n">that</span> <span class="o">=</span> <span class="n">other</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">combine_frames</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">,</span> <span class="n">other</span><span class="o">.</span><span class="n">_psdf</span><span class="p">)</span> <span class="c1"># type: ignore[assignment]</span>
<span class="n">this</span> <span class="o">=</span> <span class="n">combined</span><span class="p">[</span><span class="s2">&quot;this&quot;</span><span class="p">]</span>
<span class="n">that</span> <span class="o">=</span> <span class="n">combined</span><span class="p">[</span><span class="s2">&quot;that&quot;</span><span class="p">]</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span>
<span class="n">index_col_name</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="s2">&quot;__ser_corr_index_temp_column__&quot;</span><span class="p">)</span>
<span class="n">this_scol</span> <span class="o">=</span> <span class="n">this</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_column_for</span><span class="p">(</span><span class="n">this</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="n">that_scol</span> <span class="o">=</span> <span class="n">that</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_column_for</span><span class="p">(</span><span class="n">that</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">index_col_name</span><span class="p">),</span>
<span class="n">this_scol</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="s2">&quot;double&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">CORRELATION_VALUE_1_COLUMN</span><span class="p">),</span>
<span class="n">that_scol</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="s2">&quot;double&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">CORRELATION_VALUE_2_COLUMN</span><span class="p">),</span>
<span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">compute</span><span class="p">(</span><span class="n">sdf</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span> <span class="n">groupKeys</span><span class="o">=</span><span class="p">[</span><span class="n">index_col_name</span><span class="p">],</span> <span class="n">method</span><span class="o">=</span><span class="n">method</span><span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">CORRELATION_COUNT_OUTPUT_COLUMN</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">min_periods</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="s2">&quot;double&quot;</span><span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">CORRELATION_CORR_OUTPUT_COLUMN</span><span class="p">))</span>
<span class="p">)</span>
<span class="n">results</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">take</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">results</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;attempt to get corr of an empty sequence&quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span> <span class="k">if</span> <span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span></div>
<div class="viewcode-block" id="Series.nsmallest"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.nsmallest.html#pyspark.pandas.Series.nsmallest">[docs]</a> <span class="k">def</span> <span class="nf">nsmallest</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the smallest `n` elements.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> n : int, default 5</span>
<span class="sd"> Return this many ascending sorted values.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> The `n` smallest values in the Series, sorted in increasing order.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.nlargest: Get the `n` largest elements.</span>
<span class="sd"> Series.sort_values: Sort Series by values.</span>
<span class="sd"> Series.head: Return the first `n` rows.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Faster than ``.sort_values().head(n)`` for small `n` relative to</span>
<span class="sd"> the size of the ``Series`` object.</span>
<span class="sd"> In pandas-on-Spark, thanks to Spark&#39;s lazy execution and query optimizer,</span>
<span class="sd"> the two would have same performance.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; data = [1, 2, 3, 4, np.nan ,6, 7, 8]</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> 4 NaN</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> 6 7.0</span>
<span class="sd"> 7 8.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> The `n` largest elements where ``n=5`` by default.</span>
<span class="sd"> &gt;&gt;&gt; s.nsmallest()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.nsmallest(3)</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">sort_values</span><span class="p">(</span><span class="n">ascending</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="n">n</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.nlargest"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.nlargest.html#pyspark.pandas.Series.nlargest">[docs]</a> <span class="k">def</span> <span class="nf">nlargest</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the largest `n` elements.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> n : int, default 5</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> The `n` largest values in the Series, sorted in decreasing order.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.nsmallest: Get the `n` smallest elements.</span>
<span class="sd"> Series.sort_values: Sort Series by values.</span>
<span class="sd"> Series.head: Return the first `n` rows.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Faster than ``.sort_values(ascending=False).head(n)`` for small `n`</span>
<span class="sd"> relative to the size of the ``Series`` object.</span>
<span class="sd"> In pandas-on-Spark, thanks to Spark&#39;s lazy execution and query optimizer,</span>
<span class="sd"> the two would have same performance.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; data = [1, 2, 3, 4, np.nan ,6, 7, 8]</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> 4 NaN</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> 6 7.0</span>
<span class="sd"> 7 8.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> The `n` largest elements where ``n=5`` by default.</span>
<span class="sd"> &gt;&gt;&gt; s.nlargest()</span>
<span class="sd"> 7 8.0</span>
<span class="sd"> 6 7.0</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.nlargest(n=3)</span>
<span class="sd"> 7 8.0</span>
<span class="sd"> 6 7.0</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">sort_values</span><span class="p">(</span><span class="n">ascending</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="n">n</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.sample"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.sample.html#pyspark.pandas.Series.sample">[docs]</a> <span class="k">def</span> <span class="nf">sample</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">n</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">frac</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">replace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">random_state</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ignore_index</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span>
<span class="n">n</span><span class="o">=</span><span class="n">n</span><span class="p">,</span>
<span class="n">frac</span><span class="o">=</span><span class="n">frac</span><span class="p">,</span>
<span class="n">replace</span><span class="o">=</span><span class="n">replace</span><span class="p">,</span>
<span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">,</span>
<span class="n">ignore_index</span><span class="o">=</span><span class="n">ignore_index</span><span class="p">,</span>
<span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<span class="n">sample</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="o">.</span><span class="n">sample</span><span class="o">.</span><span class="vm">__doc__</span>
<div class="viewcode-block" id="Series.hist"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.hist.html#pyspark.pandas.Series.hist">[docs]</a> <span class="nd">@no_type_check</span>
<span class="k">def</span> <span class="nf">hist</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="o">**</span><span class="n">kwds</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">plot</span><span class="o">.</span><span class="n">hist</span><span class="p">(</span><span class="n">bins</span><span class="p">,</span> <span class="o">**</span><span class="n">kwds</span><span class="p">)</span></div>
<span class="n">hist</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">PandasOnSparkPlotAccessor</span><span class="o">.</span><span class="n">hist</span><span class="o">.</span><span class="vm">__doc__</span>
<div class="viewcode-block" id="Series.apply"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.apply.html#pyspark.pandas.Series.apply">[docs]</a> <span class="k">def</span> <span class="nf">apply</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">,</span> <span class="n">args</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">(),</span> <span class="o">**</span><span class="n">kwds</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Invoke function on values of Series.</span>
<span class="sd"> Can be a Python function that only works on the Series.</span>
<span class="sd"> .. note:: this API executes the function once to infer the type which is</span>
<span class="sd"> potentially expensive, for instance, when the dataset is created after</span>
<span class="sd"> aggregations or sorting.</span>
<span class="sd"> To avoid this, specify return type in ``func``, for instance, as below:</span>
<span class="sd"> &gt;&gt;&gt; def square(x) -&gt; np.int32:</span>
<span class="sd"> ... return x ** 2</span>
<span class="sd"> pandas-on-Spark uses return type hint and does not try to infer the type.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> func : function</span>
<span class="sd"> Python function to apply. Note that type hint for return type is required.</span>
<span class="sd"> args : tuple</span>
<span class="sd"> Positional arguments passed to func after the series value.</span>
<span class="sd"> **kwds</span>
<span class="sd"> Additional keyword arguments passed to func.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.aggregate : Only perform aggregating type operations.</span>
<span class="sd"> Series.transform : Only perform transforming type operations.</span>
<span class="sd"> DataFrame.apply : The equivalent function for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Create a Series with typical summer temperatures for each city.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([20, 21, 12],</span>
<span class="sd"> ... index=[&#39;London&#39;, &#39;New York&#39;, &#39;Helsinki&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> London 20</span>
<span class="sd"> New York 21</span>
<span class="sd"> Helsinki 12</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Square the values by defining a function and passing it as an</span>
<span class="sd"> argument to ``apply()``.</span>
<span class="sd"> &gt;&gt;&gt; def square(x) -&gt; np.int64:</span>
<span class="sd"> ... return x ** 2</span>
<span class="sd"> &gt;&gt;&gt; s.apply(square)</span>
<span class="sd"> London 400</span>
<span class="sd"> New York 441</span>
<span class="sd"> Helsinki 144</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Define a custom function that needs additional positional</span>
<span class="sd"> arguments and pass these additional arguments using the</span>
<span class="sd"> ``args`` keyword</span>
<span class="sd"> &gt;&gt;&gt; def subtract_custom_value(x, custom_value) -&gt; np.int64:</span>
<span class="sd"> ... return x - custom_value</span>
<span class="sd"> &gt;&gt;&gt; s.apply(subtract_custom_value, args=(5,))</span>
<span class="sd"> London 15</span>
<span class="sd"> New York 16</span>
<span class="sd"> Helsinki 7</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Define a custom function that takes keyword arguments</span>
<span class="sd"> and pass these arguments to ``apply``</span>
<span class="sd"> &gt;&gt;&gt; def add_custom_values(x, **kwargs) -&gt; np.int64:</span>
<span class="sd"> ... for month in kwargs:</span>
<span class="sd"> ... x += kwargs[month]</span>
<span class="sd"> ... return x</span>
<span class="sd"> &gt;&gt;&gt; s.apply(add_custom_values, june=30, july=20, august=25)</span>
<span class="sd"> London 95</span>
<span class="sd"> New York 96</span>
<span class="sd"> Helsinki 87</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Use a function from the Numpy library</span>
<span class="sd"> &gt;&gt;&gt; def numpy_log(col) -&gt; np.float64:</span>
<span class="sd"> ... return np.log(col)</span>
<span class="sd"> &gt;&gt;&gt; s.apply(numpy_log)</span>
<span class="sd"> London 2.995732</span>
<span class="sd"> New York 3.044522</span>
<span class="sd"> Helsinki 2.484907</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> You can omit the type hint and let pandas-on-Spark infer its type.</span>
<span class="sd"> &gt;&gt;&gt; s.apply(np.log)</span>
<span class="sd"> London 2.995732</span>
<span class="sd"> New York 3.044522</span>
<span class="sd"> Helsinki 2.484907</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">callable</span><span class="p">(</span><span class="n">func</span><span class="p">),</span> <span class="s2">&quot;the first argument should be a callable function.&quot;</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">spec</span> <span class="o">=</span> <span class="n">inspect</span><span class="o">.</span><span class="n">getfullargspec</span><span class="p">(</span><span class="n">func</span><span class="p">)</span>
<span class="n">return_sig</span> <span class="o">=</span> <span class="n">spec</span><span class="o">.</span><span class="n">annotations</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;return&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="n">should_infer_schema</span> <span class="o">=</span> <span class="n">return_sig</span> <span class="ow">is</span> <span class="kc">None</span>
<span class="k">except</span> <span class="ne">TypeError</span><span class="p">:</span>
<span class="c1"># Falls back to schema inference if it fails to get signature.</span>
<span class="n">should_infer_schema</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">def</span> <span class="nf">apply_each</span><span class="p">(</span><span class="n">s</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">:</span>
<span class="k">return</span> <span class="n">s</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwds</span><span class="p">)</span>
<span class="k">if</span> <span class="n">should_infer_schema</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">pandas_on_spark</span><span class="o">.</span><span class="n">_transform_batch</span><span class="p">(</span><span class="n">apply_each</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">sig_return</span> <span class="o">=</span> <span class="n">infer_return_type</span><span class="p">(</span><span class="n">func</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">sig_return</span><span class="p">,</span> <span class="n">ScalarType</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;Expected the return type of this function to be of scalar type, &quot;</span>
<span class="s2">&quot;but found type </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sig_return</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">return_type</span> <span class="o">=</span> <span class="n">sig_return</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">pandas_on_spark</span><span class="o">.</span><span class="n">_transform_batch</span><span class="p">(</span><span class="n">apply_each</span><span class="p">,</span> <span class="n">return_type</span><span class="p">)</span></div>
<span class="c1"># TODO: not all arguments are implemented comparing to pandas&#39; for now.</span>
<div class="viewcode-block" id="Series.aggregate"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.aggregate.html#pyspark.pandas.Series.aggregate">[docs]</a> <span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Aggregate using one or more operations over the specified axis.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> func : str or a list of str</span>
<span class="sd"> function name(s) as string apply to series.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> scalar, Series</span>
<span class="sd"> The return can be:</span>
<span class="sd"> - scalar : when Series.agg is called with single function</span>
<span class="sd"> - Series : when Series.agg is called with several functions</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> `agg` is an alias for `aggregate`. Use the alias.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.apply : Invoke function on a Series.</span>
<span class="sd"> Series.transform : Only perform transforming type operations.</span>
<span class="sd"> Series.groupby : Perform operations over groups.</span>
<span class="sd"> DataFrame.aggregate : The equivalent function for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s.agg(&#39;min&#39;)</span>
<span class="sd"> 1</span>
<span class="sd"> &gt;&gt;&gt; s.agg([&#39;min&#39;, &#39;max&#39;]).sort_index()</span>
<span class="sd"> max 4</span>
<span class="sd"> min 1</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span><span class="n">func</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">return</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">)()</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;func must be a string or list of strings&quot;</span><span class="p">)</span></div>
<span class="n">agg</span> <span class="o">=</span> <span class="n">aggregate</span>
<span class="k">def</span> <span class="nf">transpose</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the transpose, which is self.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> It returns the same object as the transpose of the given series object, which is by</span>
<span class="sd"> definition self.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.transpose()</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">T</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">transpose</span><span class="p">)</span>
<div class="viewcode-block" id="Series.transform"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.transform.html#pyspark.pandas.Series.transform">[docs]</a> <span class="k">def</span> <span class="nf">transform</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Callable</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Callable</span><span class="p">]],</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Call ``func`` producing the same type as `self` with transformed values</span>
<span class="sd"> and that has the same axis length as input.</span>
<span class="sd"> .. note:: this API executes the function once to infer the type which is</span>
<span class="sd"> potentially expensive, for instance, when the dataset is created after</span>
<span class="sd"> aggregations or sorting.</span>
<span class="sd"> To avoid this, specify return type in ``func``, for instance, as below:</span>
<span class="sd"> &gt;&gt;&gt; def square(x) -&gt; np.int32:</span>
<span class="sd"> ... return x ** 2</span>
<span class="sd"> pandas-on-Spark uses return type hint and does not try to infer the type.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> func : function or list</span>
<span class="sd"> A function or a list of functions to use for transforming the data.</span>
<span class="sd"> axis : int, default 0 or &#39;index&#39;</span>
<span class="sd"> Can only be set to 0 now.</span>
<span class="sd"> *args</span>
<span class="sd"> Positional arguments to pass to `func`.</span>
<span class="sd"> **kwargs</span>
<span class="sd"> Keyword arguments to pass to `func`.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> An instance of the same type with `self` that must have the same length as input.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.aggregate : Only perform aggregating type operations.</span>
<span class="sd"> Series.apply : Invoke function on Series.</span>
<span class="sd"> DataFrame.transform : The equivalent function for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(range(3))</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 0</span>
<span class="sd"> 1 1</span>
<span class="sd"> 2 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; def sqrt(x) -&gt; float:</span>
<span class="sd"> ... return np.sqrt(x)</span>
<span class="sd"> &gt;&gt;&gt; s.transform(sqrt)</span>
<span class="sd"> 0 0.000000</span>
<span class="sd"> 1 1.000000</span>
<span class="sd"> 2 1.414214</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Even though the resulting instance must have the same length as the</span>
<span class="sd"> input, it is possible to provide several input functions:</span>
<span class="sd"> &gt;&gt;&gt; def exp(x) -&gt; float:</span>
<span class="sd"> ... return np.exp(x)</span>
<span class="sd"> &gt;&gt;&gt; s.transform([sqrt, exp])</span>
<span class="sd"> sqrt exp</span>
<span class="sd"> 0 0.000000 1.000000</span>
<span class="sd"> 1 1.000000 2.718282</span>
<span class="sd"> 2 1.414214 7.389056</span>
<span class="sd"> You can omit the type hint and let pandas-on-Spark infer its type.</span>
<span class="sd"> &gt;&gt;&gt; s.transform([np.sqrt, np.exp])</span>
<span class="sd"> sqrt exp</span>
<span class="sd"> 0 0.000000 1.000000</span>
<span class="sd"> 1 1.000000 2.718282</span>
<span class="sd"> 2 1.414214 7.389056</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span>
<span class="k">if</span> <span class="n">axis</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s1">&#39;axis should be either 0 or &quot;index&quot; currently.&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
<span class="n">applied</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">func</span><span class="p">:</span>
<span class="n">applied</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="vm">__name__</span><span class="p">))</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_columns</span><span class="p">(</span><span class="n">applied</span><span class="p">)</span>
<span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.round"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.round.html#pyspark.pandas.Series.round">[docs]</a> <span class="k">def</span> <span class="nf">round</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">decimals</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Round each value in a Series to the given number of decimals.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> decimals : int</span>
<span class="sd"> Number of decimal places to round to (default: 0).</span>
<span class="sd"> If decimals are negative, it specifies the number of</span>
<span class="sd"> positions to the left of the decimal point.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series object</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> DataFrame.round</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; df = ps.Series([0.028208, 0.038683, 0.877076], name=&#39;x&#39;)</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> 0 0.028208</span>
<span class="sd"> 1 0.038683</span>
<span class="sd"> 2 0.877076</span>
<span class="sd"> Name: x, dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; df.round(2)</span>
<span class="sd"> 0 0.03</span>
<span class="sd"> 1 0.04</span>
<span class="sd"> 2 0.88</span>
<span class="sd"> Name: x, dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">decimals</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;decimals must be an integer&quot;</span><span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> <span class="n">decimals</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span>
<span class="n">scol</span><span class="p">,</span>
<span class="n">field</span><span class="o">=</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">nullable</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">DecimalType</span><span class="p">)</span>
<span class="k">else</span> <span class="kc">None</span>
<span class="p">),</span>
<span class="p">)</span></div>
<span class="c1"># TODO: add &#39;interpolation&#39; parameter.</span>
<div class="viewcode-block" id="Series.quantile"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.quantile.html#pyspark.pandas.Series.quantile">[docs]</a> <span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">q</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="mf">0.5</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10000</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return value at the given quantile.</span>
<span class="sd"> .. note:: Unlike pandas&#39;, the quantile in pandas-on-Spark is an approximated quantile</span>
<span class="sd"> based upon approximate percentile computation because computing quantile across</span>
<span class="sd"> a large dataset is extremely expensive.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> q : float or array-like, default 0.5 (50% quantile)</span>
<span class="sd"> 0 &lt;= q &lt;= 1, the quantile(s) to compute.</span>
<span class="sd"> accuracy : int, optional</span>
<span class="sd"> Default accuracy of approximation. Larger value means better accuracy.</span>
<span class="sd"> The relative error can be deduced by 1.0 / accuracy.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> float or Series</span>
<span class="sd"> If the current object is a Series and ``q`` is an array, a Series will be</span>
<span class="sd"> returned where the index is ``q`` and the values are the quantiles, otherwise</span>
<span class="sd"> a float will be returned.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.quantile(.5)</span>
<span class="sd"> 3.0</span>
<span class="sd"> &gt;&gt;&gt; (s + 1).quantile(.5)</span>
<span class="sd"> 4.0</span>
<span class="sd"> &gt;&gt;&gt; s.quantile([.25, .5, .75])</span>
<span class="sd"> 0.25 2.0</span>
<span class="sd"> 0.50 3.0</span>
<span class="sd"> 0.75 4.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; (s + 1).quantile([.25, .5, .75])</span>
<span class="sd"> 0.25 3.0</span>
<span class="sd"> 0.50 4.0</span>
<span class="sd"> 0.75 5.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">Iterable</span><span class="p">):</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span>
<span class="n">cast</span><span class="p">(</span>
<span class="s2">&quot;ps.DataFrame&quot;</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="n">q</span><span class="o">=</span><span class="n">q</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">numeric_only</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">accuracy</span><span class="o">=</span><span class="n">accuracy</span><span class="p">),</span>
<span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">accuracy</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;accuracy must be an integer; however, got [</span><span class="si">%s</span><span class="s2">]&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">accuracy</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span>
<span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;q must be a float or an array of floats; however, [</span><span class="si">%s</span><span class="s2">] found.&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">q</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">q_float</span> <span class="o">=</span> <span class="n">q</span>
<span class="k">if</span> <span class="n">q_float</span> <span class="o">&lt;</span> <span class="mf">0.0</span> <span class="ow">or</span> <span class="n">q_float</span> <span class="o">&gt;</span> <span class="mf">1.0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;percentiles should all be in the interval [0, 1].&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span><span class="n">psser</span><span class="p">:</span> <span class="n">Series</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">PySparkColumn</span><span class="p">:</span>
<span class="n">spark_type</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span>
<span class="n">spark_column</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="p">(</span><span class="n">BooleanType</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">,</span> <span class="n">NullType</span><span class="p">)):</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">percentile_approx</span><span class="p">(</span><span class="n">spark_column</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">DoubleType</span><span class="p">()),</span> <span class="n">q_float</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;Could not convert </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">) to numeric&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="n">spark_type</span><span class="p">),</span> <span class="n">spark_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">()</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_reduce_for_stat_function</span><span class="p">(</span><span class="n">quantile</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;quantile&quot;</span><span class="p">)</span></div>
<span class="c1"># TODO: add axis, pct, na_option parameter</span>
<div class="viewcode-block" id="Series.rank"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rank.html#pyspark.pandas.Series.rank">[docs]</a> <span class="k">def</span> <span class="nf">rank</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">method</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;average&quot;</span><span class="p">,</span> <span class="n">ascending</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compute numerical data ranks (1 through n) along axis. Equal values are</span>
<span class="sd"> assigned a rank that is the average of the ranks of those values.</span>
<span class="sd"> .. note:: the current implementation of rank uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to moveing all data into</span>
<span class="sd"> a single partition in a single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method with very large datasets.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> method : {&#39;average&#39;, &#39;min&#39;, &#39;max&#39;, &#39;first&#39;, &#39;dense&#39;}</span>
<span class="sd"> * average: average rank of group</span>
<span class="sd"> * min: lowest rank in group</span>
<span class="sd"> * max: highest rank in group</span>
<span class="sd"> * first: ranks assigned in order they appear in the array</span>
<span class="sd"> * dense: like &#39;min&#39;, but rank always increases by 1 between groups</span>
<span class="sd"> ascending : boolean, default True</span>
<span class="sd"> False for ranks by high (1) to low (N)</span>
<span class="sd"> numeric_only : bool, default False</span>
<span class="sd"> For DataFrame objects, rank only numeric columns if set to True.</span>
<span class="sd"> .. versionchanged:: 4.0.0</span>
<span class="sd"> The default value of ``numeric_only`` is now ``False``.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> ranks : same type as caller</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 2, 3], name=&#39;A&#39;)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 2</span>
<span class="sd"> 3 3</span>
<span class="sd"> Name: A, dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rank()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.5</span>
<span class="sd"> 2 2.5</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> Name: A, dtype: float64</span>
<span class="sd"> If method is set to &#39;min&#39;, it uses lowest rank in group.</span>
<span class="sd"> &gt;&gt;&gt; s.rank(method=&#39;min&#39;)</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> Name: A, dtype: float64</span>
<span class="sd"> If method is set to &#39;max&#39;, it uses highest rank in group.</span>
<span class="sd"> &gt;&gt;&gt; s.rank(method=&#39;max&#39;)</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 3.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> Name: A, dtype: float64</span>
<span class="sd"> If method is set to &#39;first&#39;, it is assigned rank in order without groups.</span>
<span class="sd"> &gt;&gt;&gt; s.rank(method=&#39;first&#39;)</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> Name: A, dtype: float64</span>
<span class="sd"> If method is set to &#39;dense&#39;, it leaves no gaps in group.</span>
<span class="sd"> &gt;&gt;&gt; s.rank(method=&#39;dense&#39;)</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 3.0</span>
<span class="sd"> Name: A, dtype: float64</span>
<span class="sd"> If numeric_only is set to &#39;True&#39;, rank only numeric Series,</span>
<span class="sd"> return an empty Series otherwise.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&#39;a&#39;, &#39;b&#39;, &#39;c&#39;], name=&#39;A&#39;, index=[&#39;x&#39;, &#39;y&#39;, &#39;z&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> x a</span>
<span class="sd"> y b</span>
<span class="sd"> z c</span>
<span class="sd"> Name: A, dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">is_numeric</span> <span class="o">=</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="p">(</span><span class="n">NumericType</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">))</span>
<span class="k">if</span> <span class="n">numeric_only</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">is_numeric</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;Series.rank does not allow numeric_only=True with non-numeric dtype.&quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_rank</span><span class="p">(</span><span class="n">method</span><span class="p">,</span> <span class="n">ascending</span><span class="p">)</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">analyzed</span></div>
<span class="k">def</span> <span class="nf">_rank</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">method</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;average&quot;</span><span class="p">,</span>
<span class="n">ascending</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">part_cols</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="s2">&quot;ColumnOrName&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(),</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">if</span> <span class="n">method</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;average&quot;</span><span class="p">,</span> <span class="s2">&quot;min&quot;</span><span class="p">,</span> <span class="s2">&quot;max&quot;</span><span class="p">,</span> <span class="s2">&quot;first&quot;</span><span class="p">,</span> <span class="s2">&quot;dense&quot;</span><span class="p">]:</span>
<span class="n">msg</span> <span class="o">=</span> <span class="s2">&quot;method must be one of &#39;average&#39;, &#39;min&#39;, &#39;max&#39;, &#39;first&#39;, &#39;dense&#39;&quot;</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_level</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;rank do not support MultiIndex now&quot;</span><span class="p">)</span>
<span class="n">Column</span> <span class="o">=</span> <span class="n">get_column_class</span><span class="p">()</span>
<span class="k">if</span> <span class="n">ascending</span><span class="p">:</span>
<span class="n">asc_func</span> <span class="o">=</span> <span class="n">Column</span><span class="o">.</span><span class="n">asc</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">asc_func</span> <span class="o">=</span> <span class="n">Column</span><span class="o">.</span><span class="n">desc</span>
<span class="k">if</span> <span class="n">method</span> <span class="o">==</span> <span class="s2">&quot;first&quot;</span><span class="p">:</span>
<span class="n">window</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span>
<span class="n">asc_func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">),</span>
<span class="n">asc_func</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)),</span>
<span class="p">)</span>
<span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">part_cols</span><span class="p">)</span>
<span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">method</span> <span class="o">==</span> <span class="s2">&quot;dense&quot;</span><span class="p">:</span>
<span class="n">window</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">asc_func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">))</span>
<span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">part_cols</span><span class="p">)</span>
<span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">dense_rank</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">method</span> <span class="o">==</span> <span class="s2">&quot;average&quot;</span><span class="p">:</span>
<span class="n">stat_func</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">mean</span>
<span class="k">elif</span> <span class="n">method</span> <span class="o">==</span> <span class="s2">&quot;min&quot;</span><span class="p">:</span>
<span class="n">stat_func</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">min</span>
<span class="k">elif</span> <span class="n">method</span> <span class="o">==</span> <span class="s2">&quot;max&quot;</span><span class="p">:</span>
<span class="n">stat_func</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">max</span>
<span class="n">window1</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">asc_func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">))</span>
<span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">part_cols</span><span class="p">)</span>
<span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">window2</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span>
<span class="n">cast</span><span class="p">(</span><span class="s2">&quot;List[ColumnOrName]&quot;</span><span class="p">,</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">])</span> <span class="o">+</span> <span class="nb">list</span><span class="p">(</span><span class="n">part_cols</span><span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">unboundedFollowing</span><span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">stat_func</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window1</span><span class="p">))</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window2</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">DoubleType</span><span class="p">()))</span>
<div class="viewcode-block" id="Series.filter"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.filter.html#pyspark.pandas.Series.filter">[docs]</a> <span class="k">def</span> <span class="nf">filter</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">items</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Sequence</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">like</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">regex</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span>
<span class="k">if</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Series does not support columns axis.&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">items</span><span class="o">=</span><span class="n">items</span><span class="p">,</span> <span class="n">like</span><span class="o">=</span><span class="n">like</span><span class="p">,</span> <span class="n">regex</span><span class="o">=</span><span class="n">regex</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<span class="nb">filter</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="o">.</span><span class="n">filter</span><span class="o">.</span><span class="vm">__doc__</span>
<div class="viewcode-block" id="Series.describe"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.describe.html#pyspark.pandas.Series.describe">[docs]</a> <span class="k">def</span> <span class="nf">describe</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">percentiles</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">percentiles</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<span class="n">describe</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="o">.</span><span class="n">describe</span><span class="o">.</span><span class="vm">__doc__</span>
<div class="viewcode-block" id="Series.diff"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.diff.html#pyspark.pandas.Series.diff">[docs]</a> <span class="k">def</span> <span class="nf">diff</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">periods</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> First discrete difference of element.</span>
<span class="sd"> Calculates the difference of a Series element compared with another element in the</span>
<span class="sd"> DataFrame (default is the element in the same column of the previous row).</span>
<span class="sd"> .. note:: the current implementation of diff uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to moveing all data into</span>
<span class="sd"> a single partition in a single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method with very large datasets.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> periods : int, default 1</span>
<span class="sd"> Periods to shift for calculating difference, accepts negative values.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> diffed : Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [1, 2, 3, 4, 5, 6],</span>
<span class="sd"> ... &#39;b&#39;: [1, 1, 2, 3, 5, 8],</span>
<span class="sd"> ... &#39;c&#39;: [1, 4, 9, 16, 25, 36]}, columns=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> a b c</span>
<span class="sd"> 0 1 1 1</span>
<span class="sd"> 1 2 1 4</span>
<span class="sd"> 2 3 2 9</span>
<span class="sd"> 3 4 3 16</span>
<span class="sd"> 4 5 5 25</span>
<span class="sd"> 5 6 8 36</span>
<span class="sd"> &gt;&gt;&gt; df.b.diff()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 0.0</span>
<span class="sd"> 2 1.0</span>
<span class="sd"> 3 1.0</span>
<span class="sd"> 4 2.0</span>
<span class="sd"> 5 3.0</span>
<span class="sd"> Name: b, dtype: float64</span>
<span class="sd"> Difference with previous value</span>
<span class="sd"> &gt;&gt;&gt; df.c.diff(periods=3)</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 NaN</span>
<span class="sd"> 3 15.0</span>
<span class="sd"> 4 21.0</span>
<span class="sd"> 5 27.0</span>
<span class="sd"> Name: c, dtype: float64</span>
<span class="sd"> Difference with following value</span>
<span class="sd"> &gt;&gt;&gt; df.c.diff(periods=-1)</span>
<span class="sd"> 0 -3.0</span>
<span class="sd"> 1 -5.0</span>
<span class="sd"> 2 -7.0</span>
<span class="sd"> 3 -9.0</span>
<span class="sd"> 4 -11.0</span>
<span class="sd"> 5 NaN</span>
<span class="sd"> Name: c, dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_diff</span><span class="p">(</span><span class="n">periods</span><span class="p">)</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">analyzed</span></div>
<span class="k">def</span> <span class="nf">_diff</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">periods</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">part_cols</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="s2">&quot;ColumnOrName&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">())</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">periods</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;periods should be an int; however, got [</span><span class="si">%s</span><span class="s2">]&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">periods</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
<span class="n">window</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">part_cols</span><span class="p">)</span>
<span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="o">-</span><span class="n">periods</span><span class="p">,</span> <span class="o">-</span><span class="n">periods</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="o">-</span> <span class="n">F</span><span class="o">.</span><span class="n">lag</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> <span class="n">periods</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">field</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">nullable</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span>
<div class="viewcode-block" id="Series.idxmax"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.idxmax.html#pyspark.pandas.Series.idxmax">[docs]</a> <span class="k">def</span> <span class="nf">idxmax</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Tuple</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the row label of the maximum value.</span>
<span class="sd"> If multiple values equal the maximum, the first row label with that</span>
<span class="sd"> value is returned.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> skipna : bool, default True</span>
<span class="sd"> Exclude NA/null values. If the entire Series is NA, the result</span>
<span class="sd"> will be NA.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Index</span>
<span class="sd"> Label of the maximum value.</span>
<span class="sd"> Raises</span>
<span class="sd"> ------</span>
<span class="sd"> ValueError</span>
<span class="sd"> If the Series is empty.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.idxmin : Return index *label* of the first occurrence</span>
<span class="sd"> of minimum of values.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data=[1, None, 4, 3, 5],</span>
<span class="sd"> ... index=[&#39;A&#39;, &#39;B&#39;, &#39;C&#39;, &#39;D&#39;, &#39;E&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> A 1.0</span>
<span class="sd"> B NaN</span>
<span class="sd"> C 4.0</span>
<span class="sd"> D 3.0</span>
<span class="sd"> E 5.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.idxmax()</span>
<span class="sd"> &#39;E&#39;</span>
<span class="sd"> If `skipna` is False and there is an NA value in the data,</span>
<span class="sd"> the function returns ``nan``.</span>
<span class="sd"> &gt;&gt;&gt; s.idxmax(skipna=False)</span>
<span class="sd"> nan</span>
<span class="sd"> In case of multi-index, you get a tuple:</span>
<span class="sd"> &gt;&gt;&gt; index = pd.MultiIndex.from_arrays([</span>
<span class="sd"> ... [&#39;a&#39;, &#39;a&#39;, &#39;b&#39;, &#39;b&#39;], [&#39;c&#39;, &#39;d&#39;, &#39;e&#39;, &#39;f&#39;]], names=(&#39;first&#39;, &#39;second&#39;))</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data=[1, None, 4, 5], index=index)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> first second</span>
<span class="sd"> a c 1.0</span>
<span class="sd"> d NaN</span>
<span class="sd"> b e 4.0</span>
<span class="sd"> f 5.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.idxmax()</span>
<span class="sd"> (&#39;b&#39;, &#39;f&#39;)</span>
<span class="sd"> If multiple values equal the maximum, the first row label with that</span>
<span class="sd"> value is returned.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 100, 1, 100, 1, 100], index=[10, 3, 5, 2, 1, 8])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 10 1</span>
<span class="sd"> 3 100</span>
<span class="sd"> 5 1</span>
<span class="sd"> 2 100</span>
<span class="sd"> 1 1</span>
<span class="sd"> 8 100</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.idxmax()</span>
<span class="sd"> 3</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">index_scols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span>
<span class="k">if</span> <span class="n">skipna</span><span class="p">:</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">desc_nulls_last</span><span class="p">(),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">desc_nulls_first</span><span class="p">(),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="n">results</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">([</span><span class="n">scol</span><span class="p">]</span> <span class="o">+</span> <span class="n">index_scols</span><span class="p">)</span><span class="o">.</span><span class="n">take</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">results</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;attempt to get idxmin of an empty sequence&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># This will only happen when skipna is False because we will</span>
<span class="c1"># place nulls first.</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
<span class="s2">&quot;The behavior of Series.idxmax with all-NA values, or any-NA and skipna=False, &quot;</span>
<span class="s2">&quot;is deprecated. In a future version this will raise ValueError&quot;</span><span class="p">,</span>
<span class="ne">FutureWarning</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="n">values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">:])</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">values</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">return</span> <span class="n">values</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">values</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.idxmin"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.idxmin.html#pyspark.pandas.Series.idxmin">[docs]</a> <span class="k">def</span> <span class="nf">idxmin</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Tuple</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the row label of the minimum value.</span>
<span class="sd"> If multiple values equal the minimum, the first row label with that</span>
<span class="sd"> value is returned.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> skipna : bool, default True</span>
<span class="sd"> Exclude NA/null values. If the entire Series is NA, the result</span>
<span class="sd"> will be NA.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Index</span>
<span class="sd"> Label of the minimum value.</span>
<span class="sd"> Raises</span>
<span class="sd"> ------</span>
<span class="sd"> ValueError</span>
<span class="sd"> If the Series is empty.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.idxmax : Return index *label* of the first occurrence</span>
<span class="sd"> of maximum of values.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> This method is the Series version of ``ndarray.argmin``. This method</span>
<span class="sd"> returns the label of the minimum, while ``ndarray.argmin`` returns</span>
<span class="sd"> the position. To get the position, use ``series.values.argmin()``.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data=[1, None, 4, 0],</span>
<span class="sd"> ... index=[&#39;A&#39;, &#39;B&#39;, &#39;C&#39;, &#39;D&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> A 1.0</span>
<span class="sd"> B NaN</span>
<span class="sd"> C 4.0</span>
<span class="sd"> D 0.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.idxmin()</span>
<span class="sd"> &#39;D&#39;</span>
<span class="sd"> If `skipna` is False and there is an NA value in the data,</span>
<span class="sd"> the function returns ``nan``.</span>
<span class="sd"> &gt;&gt;&gt; s.idxmin(skipna=False)</span>
<span class="sd"> nan</span>
<span class="sd"> In case of multi-index, you get a tuple:</span>
<span class="sd"> &gt;&gt;&gt; index = pd.MultiIndex.from_arrays([</span>
<span class="sd"> ... [&#39;a&#39;, &#39;a&#39;, &#39;b&#39;, &#39;b&#39;], [&#39;c&#39;, &#39;d&#39;, &#39;e&#39;, &#39;f&#39;]], names=(&#39;first&#39;, &#39;second&#39;))</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data=[1, None, 4, 0], index=index)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> first second</span>
<span class="sd"> a c 1.0</span>
<span class="sd"> d NaN</span>
<span class="sd"> b e 4.0</span>
<span class="sd"> f 0.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.idxmin()</span>
<span class="sd"> (&#39;b&#39;, &#39;f&#39;)</span>
<span class="sd"> If multiple values equal the minimum, the first row label with that</span>
<span class="sd"> value is returned.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 100, 1, 100, 1, 100], index=[10, 3, 5, 2, 1, 8])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 10 1</span>
<span class="sd"> 3 100</span>
<span class="sd"> 5 1</span>
<span class="sd"> 2 100</span>
<span class="sd"> 1 1</span>
<span class="sd"> 8 100</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.idxmin()</span>
<span class="sd"> 10</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">index_scols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span>
<span class="k">if</span> <span class="n">skipna</span><span class="p">:</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">asc_nulls_last</span><span class="p">(),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">asc_nulls_first</span><span class="p">(),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="n">results</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">([</span><span class="n">scol</span><span class="p">]</span> <span class="o">+</span> <span class="n">index_scols</span><span class="p">)</span><span class="o">.</span><span class="n">take</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">results</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;attempt to get idxmin of an empty sequence&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># This will only happen when skipna is False because we will</span>
<span class="c1"># place nulls first.</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
<span class="s2">&quot;The behavior of Series.idxmin with all-NA values, or any-NA and skipna=False, &quot;</span>
<span class="s2">&quot;is deprecated. In a future version this will raise ValueError&quot;</span><span class="p">,</span>
<span class="ne">FutureWarning</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="n">values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">:])</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">values</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">return</span> <span class="n">values</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">values</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.pop"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.pop.html#pyspark.pandas.Series.pop">[docs]</a> <span class="k">def</span> <span class="nf">pop</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="n">Name</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">Scalar</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return item and drop from series.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> item : label</span>
<span class="sd"> Label of index to be popped.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Value that is popped from series.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data=np.arange(3), index=[&#39;A&#39;, &#39;B&#39;, &#39;C&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> A 0</span>
<span class="sd"> B 1</span>
<span class="sd"> C 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.pop(&#39;A&#39;)</span>
<span class="sd"> 0</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> B 1</span>
<span class="sd"> C 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data=np.arange(3), index=[&#39;A&#39;, &#39;A&#39;, &#39;C&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> A 0</span>
<span class="sd"> A 1</span>
<span class="sd"> C 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.pop(&#39;A&#39;)</span>
<span class="sd"> A 0</span>
<span class="sd"> A 1</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> C 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Also support for MultiIndex</span>
<span class="sd"> &gt;&gt;&gt; midx = pd.MultiIndex([[&#39;lama&#39;, &#39;cow&#39;, &#39;falcon&#39;],</span>
<span class="sd"> ... [&#39;speed&#39;, &#39;weight&#39;, &#39;length&#39;]],</span>
<span class="sd"> ... [[0, 0, 0, 1, 1, 1, 2, 2, 2],</span>
<span class="sd"> ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],</span>
<span class="sd"> ... index=midx)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> lama speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.pop(&#39;lama&#39;)</span>
<span class="sd"> speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Also support for MultiIndex with several indexes.</span>
<span class="sd"> &gt;&gt;&gt; midx = pd.MultiIndex([[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;],</span>
<span class="sd"> ... [&#39;lama&#39;, &#39;cow&#39;, &#39;falcon&#39;],</span>
<span class="sd"> ... [&#39;speed&#39;, &#39;weight&#39;, &#39;length&#39;]],</span>
<span class="sd"> ... [[0, 0, 0, 0, 0, 0, 1, 1, 1],</span>
<span class="sd"> ... [0, 0, 0, 1, 1, 1, 2, 2, 2],</span>
<span class="sd"> ... [0, 1, 2, 0, 1, 2, 0, 0, 2]]</span>
<span class="sd"> ... )</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],</span>
<span class="sd"> ... index=midx)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> a lama speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> b falcon speed 320.0</span>
<span class="sd"> speed 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.pop((&#39;a&#39;, &#39;lama&#39;))</span>
<span class="sd"> speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> a cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> b falcon speed 320.0</span>
<span class="sd"> speed 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.pop((&#39;b&#39;, &#39;falcon&#39;, &#39;speed&#39;))</span>
<span class="sd"> (b, falcon, speed) 320.0</span>
<span class="sd"> (b, falcon, speed) 1.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_name_like_value</span><span class="p">(</span><span class="n">item</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;&#39;key&#39; should be string or tuple that contains strings&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_name_like_tuple</span><span class="p">(</span><span class="n">item</span><span class="p">):</span>
<span class="n">item</span> <span class="o">=</span> <span class="p">(</span><span class="n">item</span><span class="p">,)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_level</span> <span class="o">&lt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span>
<span class="s2">&quot;Key length (</span><span class="si">{}</span><span class="s2">) exceeds index depth (</span><span class="si">{}</span><span class="s2">)&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_level</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span>
<span class="n">scols</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="p">:]</span> <span class="o">+</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">]</span>
<span class="n">rows</span> <span class="o">=</span> <span class="p">[</span><span class="n">internal</span><span class="o">.</span><span class="n">spark_columns</span><span class="p">[</span><span class="n">level</span><span class="p">]</span> <span class="o">==</span> <span class="n">index</span> <span class="k">for</span> <span class="n">level</span><span class="p">,</span> <span class="n">index</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">item</span><span class="p">)]</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span> <span class="n">rows</span><span class="p">))</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">scols</span><span class="p">)</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_drop</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_level</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">):</span>
<span class="c1"># if spark_frame has one column and one data, return data only without frame</span>
<span class="n">pdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">toPandas</span><span class="p">()</span>
<span class="n">length</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">pdf</span><span class="p">)</span>
<span class="k">if</span> <span class="n">length</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">val</span> <span class="o">=</span> <span class="n">pdf</span><span class="p">[</span><span class="n">internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="n">CategoricalDtype</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">categories</span><span class="p">[</span><span class="n">val</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">val</span>
<span class="n">item_string</span> <span class="o">=</span> <span class="n">name_like_string</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">withColumn</span><span class="p">(</span><span class="n">SPARK_DEFAULT_INDEX_NAME</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">item_string</span><span class="p">)))</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">SPARK_DEFAULT_INDEX_NAME</span><span class="p">)],</span>
<span class="n">column_labels</span><span class="o">=</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">],</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]],</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="p">[</span>
<span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_spark_column_names</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="p">:]</span>
<span class="p">],</span>
<span class="n">index_fields</span><span class="o">=</span><span class="n">internal</span><span class="o">.</span><span class="n">index_fields</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="p">:],</span>
<span class="n">index_names</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_names</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="p">:],</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])],</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span></div>
<div class="viewcode-block" id="Series.copy"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.copy.html#pyspark.pandas.Series.copy">[docs]</a> <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Make a copy of this object&#39;s indices and data.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> deep : bool, default True</span>
<span class="sd"> this parameter is not supported but just dummy parameter to match pandas.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> copy : Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2], index=[&quot;a&quot;, &quot;b&quot;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> a 1</span>
<span class="sd"> b 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s_copy = s.copy()</span>
<span class="sd"> &gt;&gt;&gt; s_copy</span>
<span class="sd"> a 1</span>
<span class="sd"> b 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="p">))</span></div>
<div class="viewcode-block" id="Series.mode"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.mode.html#pyspark.pandas.Series.mode">[docs]</a> <span class="k">def</span> <span class="nf">mode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dropna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the mode(s) of the dataset.</span>
<span class="sd"> Always returns Series even if only one value is returned.</span>
<span class="sd"> .. versionchanged:: 3.4.0</span>
<span class="sd"> Series name is preserved to follow pandas 1.4+ behavior.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> dropna : bool, default True</span>
<span class="sd"> Don&#39;t consider counts of NaN/NaT.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Modes of the Series.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([0, 0, 1, 1, 1, np.nan, np.nan, np.nan])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 0.0</span>
<span class="sd"> 1 0.0</span>
<span class="sd"> 2 1.0</span>
<span class="sd"> 3 1.0</span>
<span class="sd"> 4 1.0</span>
<span class="sd"> 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.mode()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> If there are several same modes, all items are shown</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3,</span>
<span class="sd"> ... np.nan, np.nan, np.nan])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 0.0</span>
<span class="sd"> 1 0.0</span>
<span class="sd"> 2 1.0</span>
<span class="sd"> 3 1.0</span>
<span class="sd"> 4 1.0</span>
<span class="sd"> 5 2.0</span>
<span class="sd"> 6 2.0</span>
<span class="sd"> 7 2.0</span>
<span class="sd"> 8 3.0</span>
<span class="sd"> 9 3.0</span>
<span class="sd"> 10 3.0</span>
<span class="sd"> 11 NaN</span>
<span class="sd"> 12 NaN</span>
<span class="sd"> 13 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.mode().sort_values()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> With &#39;dropna&#39; set to &#39;False&#39;, we can also see NaN in the result</span>
<span class="sd"> &gt;&gt;&gt; s.mode(False).sort_values()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">name</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">SF</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">dropna</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name</span><span class="p">))</span>
<span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">array_sort</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">name</span><span class="p">))</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name</span><span class="p">))</span>
<span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">explode</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">name</span><span class="p">))</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name</span><span class="p">))</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="p">(</span><span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span> <span class="n">index_spark_columns</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">column_labels</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">])</span>
<span class="n">ser_mode</span> <span class="o">=</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span>
<span class="n">ser_mode</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span>
<span class="k">return</span> <span class="n">ser_mode</span></div>
<div class="viewcode-block" id="Series.keys"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.keys.html#pyspark.pandas.Series.keys">[docs]</a> <span class="k">def</span> <span class="nf">keys</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;ps.Index&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return alias for index.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Index</span>
<span class="sd"> Index of the Series.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; midx = pd.MultiIndex([[&#39;lama&#39;, &#39;cow&#39;, &#39;falcon&#39;],</span>
<span class="sd"> ... [&#39;speed&#39;, &#39;weight&#39;, &#39;length&#39;]],</span>
<span class="sd"> ... [[0, 0, 0, 1, 1, 1, 2, 2, 2],</span>
<span class="sd"> ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)</span>
<span class="sd"> &gt;&gt;&gt; psser.keys() # doctest: +SKIP</span>
<span class="sd"> MultiIndex([( &#39;lama&#39;, &#39;speed&#39;),</span>
<span class="sd"> ( &#39;lama&#39;, &#39;weight&#39;),</span>
<span class="sd"> ( &#39;lama&#39;, &#39;length&#39;),</span>
<span class="sd"> ( &#39;cow&#39;, &#39;speed&#39;),</span>
<span class="sd"> ( &#39;cow&#39;, &#39;weight&#39;),</span>
<span class="sd"> ( &#39;cow&#39;, &#39;length&#39;),</span>
<span class="sd"> (&#39;falcon&#39;, &#39;speed&#39;),</span>
<span class="sd"> (&#39;falcon&#39;, &#39;weight&#39;),</span>
<span class="sd"> (&#39;falcon&#39;, &#39;length&#39;)],</span>
<span class="sd"> )</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span></div>
<span class="c1"># TODO: introduce &#39;in_place&#39;; fully support &#39;regex&#39;</span>
<div class="viewcode-block" id="Series.replace"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.replace.html#pyspark.pandas.Series.replace">[docs]</a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">to_replace</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Any</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">,</span> <span class="n">Dict</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">value</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">List</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">regex</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Replace values given in to_replace with value.</span>
<span class="sd"> Values of the Series are replaced with other values dynamically.</span>
<span class="sd"> .. note:: For partial pattern matching, the replacement is against the whole string,</span>
<span class="sd"> which is different from pandas. That&#39;s by the nature of underlying Spark API.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> to_replace : str, list, tuple, dict, Series, int, float, or None</span>
<span class="sd"> How to find the values that will be replaced.</span>
<span class="sd"> * numeric, str:</span>
<span class="sd"> - numeric: numeric values equal to to_replace will be replaced with value</span>
<span class="sd"> - str: string exactly matching to_replace will be replaced with value</span>
<span class="sd"> * list of str or numeric:</span>
<span class="sd"> - if to_replace and value are both lists or tuples, they must be the same length.</span>
<span class="sd"> - str and numeric rules apply as above.</span>
<span class="sd"> * dict:</span>
<span class="sd"> - Dicts can be used to specify different replacement values for different</span>
<span class="sd"> existing values.</span>
<span class="sd"> For example, {&#39;a&#39;: &#39;b&#39;, &#39;y&#39;: &#39;z&#39;} replaces the value ‘a’ with ‘b’ and ‘y’</span>
<span class="sd"> with ‘z’. To use a dict in this way the value parameter should be None.</span>
<span class="sd"> - For a DataFrame a dict can specify that different values should be replaced</span>
<span class="sd"> in different columns. For example, {&#39;a&#39;: 1, &#39;b&#39;: &#39;z&#39;} looks for the value 1</span>
<span class="sd"> in column ‘a’ and the value ‘z’ in column ‘b’ and replaces these values with</span>
<span class="sd"> whatever is specified in value.</span>
<span class="sd"> The value parameter should not be None in this case.</span>
<span class="sd"> You can treat this as a special case of passing two lists except that you are</span>
<span class="sd"> specifying the column to search in.</span>
<span class="sd"> See the examples section for examples of each of these.</span>
<span class="sd"> value : scalar, dict, list, tuple, str default None</span>
<span class="sd"> Value to replace any values matching to_replace with.</span>
<span class="sd"> For a DataFrame a dict of values can be used to specify which value to use</span>
<span class="sd"> for each column (columns not in the dict will not be filled).</span>
<span class="sd"> Regular expressions, strings and lists or dicts of such objects are also allowed.</span>
<span class="sd"> regex: bool or str, default False</span>
<span class="sd"> Whether to interpret to_replace and/or value as regular expressions.</span>
<span class="sd"> If this is True then to_replace must be a string.</span>
<span class="sd"> Alternatively, this could be a regular expression in which case to_replace must be None.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Object after replacement.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Scalar `to_replace` and `value`</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([0, 1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 0</span>
<span class="sd"> 1 1</span>
<span class="sd"> 2 2</span>
<span class="sd"> 3 3</span>
<span class="sd"> 4 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.replace(0, 5)</span>
<span class="sd"> 0 5</span>
<span class="sd"> 1 1</span>
<span class="sd"> 2 2</span>
<span class="sd"> 3 3</span>
<span class="sd"> 4 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> List-like `to_replace`</span>
<span class="sd"> &gt;&gt;&gt; s.replace([0, 4], 5000)</span>
<span class="sd"> 0 5000</span>
<span class="sd"> 1 1</span>
<span class="sd"> 2 2</span>
<span class="sd"> 3 3</span>
<span class="sd"> 4 5000</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.replace([1, 2, 3], [10, 20, 30])</span>
<span class="sd"> 0 0</span>
<span class="sd"> 1 10</span>
<span class="sd"> 2 20</span>
<span class="sd"> 3 30</span>
<span class="sd"> 4 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Dict-like `to_replace`</span>
<span class="sd"> &gt;&gt;&gt; s.replace({1: 1000, 2: 2000, 3: 3000, 4: 4000})</span>
<span class="sd"> 0 0</span>
<span class="sd"> 1 1000</span>
<span class="sd"> 2 2000</span>
<span class="sd"> 3 3000</span>
<span class="sd"> 4 4000</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Also support for MultiIndex</span>
<span class="sd"> &gt;&gt;&gt; midx = pd.MultiIndex([[&#39;lama&#39;, &#39;cow&#39;, &#39;falcon&#39;],</span>
<span class="sd"> ... [&#39;speed&#39;, &#39;weight&#39;, &#39;length&#39;]],</span>
<span class="sd"> ... [[0, 0, 0, 1, 1, 1, 2, 2, 2],</span>
<span class="sd"> ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],</span>
<span class="sd"> ... index=midx)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> lama speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.replace(45, 450)</span>
<span class="sd"> lama speed 450.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.replace([45, 30, 320], 500)</span>
<span class="sd"> lama speed 500.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 500.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 500.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.replace({45: 450, 30: 300})</span>
<span class="sd"> lama speed 450.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 300.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Regular expression `to_replace`</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([&#39;bat&#39;, &#39;foo&#39;, &#39;bait&#39;, &#39;abc&#39;, &#39;bar&#39;, &#39;zoo&#39;])</span>
<span class="sd"> &gt;&gt;&gt; psser.replace(to_replace=r&#39;^ba.$&#39;, value=&#39;new&#39;, regex=True)</span>
<span class="sd"> 0 new</span>
<span class="sd"> 1 foo</span>
<span class="sd"> 2 bait</span>
<span class="sd"> 3 abc</span>
<span class="sd"> 4 new</span>
<span class="sd"> 5 zoo</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; psser.replace(value=&#39;new&#39;, regex=r&#39;^.oo$&#39;)</span>
<span class="sd"> 0 bat</span>
<span class="sd"> 1 new</span>
<span class="sd"> 2 bait</span>
<span class="sd"> 3 abc</span>
<span class="sd"> 4 bar</span>
<span class="sd"> 5 new</span>
<span class="sd"> dtype: object</span>
<span class="sd"> For partial pattern matching, the replacement is against the whole string</span>
<span class="sd"> &gt;&gt;&gt; psser.replace(&#39;ba&#39;, &#39;xx&#39;, regex=True)</span>
<span class="sd"> 0 xx</span>
<span class="sd"> 1 foo</span>
<span class="sd"> 2 xx</span>
<span class="sd"> 3 abc</span>
<span class="sd"> 4 xx</span>
<span class="sd"> 5 zoo</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">regex</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">if</span> <span class="n">to_replace</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;&#39;to_replace&#39; must be &#39;None&#39; if &#39;regex&#39; is not a bool&quot;</span><span class="p">)</span>
<span class="n">to_replace</span> <span class="o">=</span> <span class="n">regex</span>
<span class="n">regex</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">regex</span><span class="p">,</span> <span class="nb">bool</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;&#39;regex&#39; of </span><span class="si">%s</span><span class="s2"> type is not supported&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">regex</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">regex</span> <span class="ow">is</span> <span class="kc">True</span><span class="p">:</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="n">to_replace</span><span class="p">,</span> <span class="nb">str</span>
<span class="p">),</span> <span class="s2">&quot;If &#39;regex&#39; is True then &#39;to_replace&#39; must be a string&quot;</span>
<span class="k">if</span> <span class="n">to_replace</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">method</span><span class="o">=</span><span class="s2">&quot;ffill&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="nb">list</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">,</span> <span class="nb">dict</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;&#39;to_replace&#39; should be one of str, list, tuple, dict, int, float&quot;</span><span class="p">)</span>
<span class="n">to_replace</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)</span> <span class="k">else</span> <span class="n">to_replace</span>
<span class="n">value</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)</span> <span class="k">else</span> <span class="n">value</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">list</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">len</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">value</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;Replacement lists must match in length. Expecting </span><span class="si">{}</span><span class="s2"> got </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="nb">len</span><span class="p">(</span><span class="n">to_replace</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="n">to_replace</span> <span class="o">=</span> <span class="p">{</span><span class="n">k</span><span class="p">:</span> <span class="n">v</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="n">value</span><span class="p">)}</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="n">is_start</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">current</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">for</span> <span class="n">to_replace_</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">to_replace</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="n">cond</span> <span class="o">=</span> <span class="p">(</span>
<span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">isnan</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span> <span class="o">|</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">())</span>
<span class="k">if</span> <span class="n">pd</span><span class="o">.</span><span class="n">isna</span><span class="p">(</span><span class="n">to_replace_</span><span class="p">)</span>
<span class="k">else</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="o">==</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">to_replace_</span><span class="p">))</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">is_start</span><span class="p">:</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">cond</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
<span class="n">is_start</span> <span class="o">=</span> <span class="kc">False</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">current</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">cond</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">current</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">regex</span><span class="p">:</span>
<span class="c1"># to_replace must be a string</span>
<span class="n">cond</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">rlike</span><span class="p">(</span><span class="n">cast</span><span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="n">to_replace</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">cond</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isin</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span>
<span class="c1"># to_replace may be a scalar</span>
<span class="k">if</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">isna</span><span class="p">(</span><span class="n">to_replace</span><span class="p">))</span><span class="o">.</span><span class="n">any</span><span class="p">():</span>
<span class="n">cond</span> <span class="o">=</span> <span class="n">cond</span> <span class="o">|</span> <span class="n">F</span><span class="o">.</span><span class="n">isnan</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span> <span class="o">|</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">()</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">cond</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">current</span><span class="p">)</span> <span class="c1"># TODO: dtype?</span></div>
<div class="viewcode-block" id="Series.update"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.update.html#pyspark.pandas.Series.update">[docs]</a> <span class="k">def</span> <span class="nf">update</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="s2">&quot;Series&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Modify Series in place using non-NA values from passed Series. Aligns on index.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.pandas.config import set_option, reset_option</span>
<span class="sd"> &gt;&gt;&gt; set_option(&quot;compute.ops_on_diff_frames&quot;, True)</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; s.update(ps.Series([4, 5, 6]))</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 0 4</span>
<span class="sd"> 1 5</span>
<span class="sd"> 2 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&#39;a&#39;, &#39;b&#39;, &#39;c&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s.update(ps.Series([&#39;d&#39;, &#39;e&#39;], index=[0, 2]))</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 0 d</span>
<span class="sd"> 1 b</span>
<span class="sd"> 2 e</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; s.update(ps.Series([4, 5, 6, 7, 8]))</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 0 4</span>
<span class="sd"> 1 5</span>
<span class="sd"> 2 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3], index=[10, 11, 12])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 10 1</span>
<span class="sd"> 11 2</span>
<span class="sd"> 12 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.update(ps.Series([4, 5, 6]))</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 10 1</span>
<span class="sd"> 11 2</span>
<span class="sd"> 12 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.update(ps.Series([4, 5, 6], index=[11, 12, 13]))</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 10 1</span>
<span class="sd"> 11 4</span>
<span class="sd"> 12 5</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> If ``other`` contains NaNs the corresponding values are not updated</span>
<span class="sd"> in the original Series.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; s.update(ps.Series([4, np.nan, 6]))</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 0 4.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 6.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; reset_option(&quot;compute.ops_on_diff_frames&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Series</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;&#39;other&#39; must be a Series&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
<span class="n">scol</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">(),</span> <span class="n">other</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_column_name_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">))</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_spark_column</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">,</span> <span class="n">scol</span> <span class="c1"># TODO: dtype?</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_update_internal_frame</span><span class="p">(</span><span class="n">internal</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">combine_frames</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">,</span> <span class="n">other</span><span class="o">.</span><span class="n">_psdf</span><span class="p">,</span> <span class="n">how</span><span class="o">=</span><span class="s2">&quot;leftouter&quot;</span><span class="p">)</span>
<span class="n">this_scol</span> <span class="o">=</span> <span class="n">combined</span><span class="p">[</span><span class="s2">&quot;this&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_column_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="n">that_scol</span> <span class="o">=</span> <span class="n">combined</span><span class="p">[</span><span class="s2">&quot;that&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_column_for</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">that_scol</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">(),</span> <span class="n">that_scol</span><span class="p">)</span>
<span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">this_scol</span><span class="p">)</span>
<span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_column_name_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">))</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">combined</span><span class="p">[</span><span class="s2">&quot;this&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_spark_column</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">,</span> <span class="n">scol</span> <span class="c1"># TODO: dtype?</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_update_internal_frame</span><span class="p">(</span><span class="n">internal</span><span class="o">.</span><span class="n">resolved_copy</span><span class="p">,</span> <span class="n">check_same_anchor</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.where"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.where.html#pyspark.pandas.Series.where">[docs]</a> <span class="k">def</span> <span class="nf">where</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cond</span><span class="p">:</span> <span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Replace values where the condition is False.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> cond : boolean Series</span>
<span class="sd"> Where cond is True, keep the original value. Where False,</span>
<span class="sd"> replace with corresponding value from other.</span>
<span class="sd"> other : scalar, Series</span>
<span class="sd"> Entries where cond is False are replaced with corresponding value from other.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.pandas.config import set_option, reset_option</span>
<span class="sd"> &gt;&gt;&gt; set_option(&quot;compute.ops_on_diff_frames&quot;, True)</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([0, 1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([100, 200, 300, 400, 500])</span>
<span class="sd"> &gt;&gt;&gt; s1.where(s1 &gt; 0).sort_index()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 3.0</span>
<span class="sd"> 4 4.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s1.where(s1 &gt; 1, 10).sort_index()</span>
<span class="sd"> 0 10</span>
<span class="sd"> 1 10</span>
<span class="sd"> 2 2</span>
<span class="sd"> 3 3</span>
<span class="sd"> 4 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s1.where(s1 &gt; 1, s1 + 100).sort_index()</span>
<span class="sd"> 0 100</span>
<span class="sd"> 1 101</span>
<span class="sd"> 2 2</span>
<span class="sd"> 3 3</span>
<span class="sd"> 4 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s1.where(s1 &gt; 1, s2).sort_index()</span>
<span class="sd"> 0 100</span>
<span class="sd"> 1 200</span>
<span class="sd"> 2 2</span>
<span class="sd"> 3 3</span>
<span class="sd"> 4 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; reset_option(&quot;compute.ops_on_diff_frames&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">cond</span><span class="p">,</span> <span class="n">Series</span><span class="p">)</span>
<span class="c1"># We should check the DataFrame from both `cond` and `other`.</span>
<span class="n">should_try_ops_on_diff_frame</span> <span class="o">=</span> <span class="ow">not</span> <span class="n">same_anchor</span><span class="p">(</span><span class="n">cond</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span> <span class="ow">or</span> <span class="p">(</span>
<span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Series</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">same_anchor</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">should_try_ops_on_diff_frame</span><span class="p">:</span>
<span class="c1"># Try to perform it with &#39;compute.ops_on_diff_frame&#39; option.</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span>
<span class="n">tmp_cond_col</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">psdf</span><span class="p">,</span> <span class="s2">&quot;__tmp_cond_col__&quot;</span><span class="p">)</span>
<span class="n">tmp_other_col</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">psdf</span><span class="p">,</span> <span class="s2">&quot;__tmp_other_col__&quot;</span><span class="p">)</span>
<span class="n">psdf</span><span class="p">[</span><span class="n">tmp_cond_col</span><span class="p">]</span> <span class="o">=</span> <span class="n">cond</span>
<span class="n">psdf</span><span class="p">[</span><span class="n">tmp_other_col</span><span class="p">]</span> <span class="o">=</span> <span class="n">other</span>
<span class="c1"># above logic makes a Spark DataFrame looks like below:</span>
<span class="c1"># +-----------------+---+----------------+-----------------+</span>
<span class="c1"># |__index_level_0__| 0|__tmp_cond_col__|__tmp_other_col__|</span>
<span class="c1"># +-----------------+---+----------------+-----------------+</span>
<span class="c1"># | 0| 0| false| 100|</span>
<span class="c1"># | 1| 1| false| 200|</span>
<span class="c1"># | 3| 3| true| 400|</span>
<span class="c1"># | 2| 2| true| 300|</span>
<span class="c1"># | 4| 4| true| 500|</span>
<span class="c1"># +-----------------+---+----------------+-----------------+</span>
<span class="n">condition</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">psdf</span><span class="p">[</span><span class="n">tmp_cond_col</span><span class="p">]</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span>
<span class="p">)</span>
<span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">psdf</span><span class="p">[</span><span class="n">tmp_other_col</span><span class="p">]</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_columns</span><span class="p">(</span>
<span class="p">[</span><span class="n">condition</span><span class="p">],</span> <span class="n">column_labels</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Series</span><span class="p">):</span>
<span class="n">other</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">condition</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">cond</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
<span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">condition</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.mask"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.mask.html#pyspark.pandas.Series.mask">[docs]</a> <span class="k">def</span> <span class="nf">mask</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cond</span><span class="p">:</span> <span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Replace values where the condition is True.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> cond : boolean Series</span>
<span class="sd"> Where cond is False, keep the original value. Where True,</span>
<span class="sd"> replace with corresponding value from other.</span>
<span class="sd"> other : scalar, Series</span>
<span class="sd"> Entries where cond is True are replaced with corresponding value from other.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.pandas.config import set_option, reset_option</span>
<span class="sd"> &gt;&gt;&gt; set_option(&quot;compute.ops_on_diff_frames&quot;, True)</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([0, 1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([100, 200, 300, 400, 500])</span>
<span class="sd"> &gt;&gt;&gt; s1.mask(s1 &gt; 0).sort_index()</span>
<span class="sd"> 0 0.0</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s1.mask(s1 &gt; 1, 10).sort_index()</span>
<span class="sd"> 0 0</span>
<span class="sd"> 1 1</span>
<span class="sd"> 2 10</span>
<span class="sd"> 3 10</span>
<span class="sd"> 4 10</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s1.mask(s1 &gt; 1, s1 + 100).sort_index()</span>
<span class="sd"> 0 0</span>
<span class="sd"> 1 1</span>
<span class="sd"> 2 102</span>
<span class="sd"> 3 103</span>
<span class="sd"> 4 104</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s1.mask(s1 &gt; 1, s2).sort_index()</span>
<span class="sd"> 0 0</span>
<span class="sd"> 1 1</span>
<span class="sd"> 2 300</span>
<span class="sd"> 3 400</span>
<span class="sd"> 4 500</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; reset_option(&quot;compute.ops_on_diff_frames&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="o">~</span><span class="n">cond</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.xs"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.xs.html#pyspark.pandas.Series.xs">[docs]</a> <span class="k">def</span> <span class="nf">xs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">Name</span><span class="p">,</span> <span class="n">level</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return cross-section from the Series.</span>
<span class="sd"> This method takes a `key` argument to select data at a particular</span>
<span class="sd"> level of a MultiIndex.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> key : label or tuple of label</span>
<span class="sd"> Label contained in the index, or partially in a MultiIndex.</span>
<span class="sd"> level : object, defaults to first n levels (n=1 or len(key))</span>
<span class="sd"> In case of a key partially contained in a MultiIndex, indicate</span>
<span class="sd"> which levels are used. Levels can be referred by label or position.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Cross-section from the original Series</span>
<span class="sd"> corresponding to the selected index levels.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; midx = pd.MultiIndex([[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;],</span>
<span class="sd"> ... [&#39;lama&#39;, &#39;cow&#39;, &#39;falcon&#39;],</span>
<span class="sd"> ... [&#39;speed&#39;, &#39;weight&#39;, &#39;length&#39;]],</span>
<span class="sd"> ... [[0, 0, 0, 1, 1, 1, 2, 2, 2],</span>
<span class="sd"> ... [0, 0, 0, 1, 1, 1, 2, 2, 2],</span>
<span class="sd"> ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],</span>
<span class="sd"> ... index=midx)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> a lama speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> b cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> c falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Get values at specified index</span>
<span class="sd"> &gt;&gt;&gt; s.xs(&#39;a&#39;)</span>
<span class="sd"> lama speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Get values at several indexes</span>
<span class="sd"> &gt;&gt;&gt; s.xs((&#39;a&#39;, &#39;lama&#39;))</span>
<span class="sd"> speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Get values at specified index and level</span>
<span class="sd"> &gt;&gt;&gt; s.xs(&#39;lama&#39;, level=1)</span>
<span class="sd"> a speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">):</span>
<span class="n">key</span> <span class="o">=</span> <span class="p">(</span><span class="n">key</span><span class="p">,)</span>
<span class="k">if</span> <span class="n">level</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">level</span> <span class="o">=</span> <span class="mi">0</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span>
<span class="n">scols</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">[:</span><span class="n">level</span><span class="p">]</span>
<span class="o">+</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">[</span><span class="n">level</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> <span class="p">:]</span>
<span class="o">+</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">]</span>
<span class="p">)</span>
<span class="n">rows</span> <span class="o">=</span> <span class="p">[</span><span class="n">internal</span><span class="o">.</span><span class="n">spark_columns</span><span class="p">[</span><span class="n">lvl</span><span class="p">]</span> <span class="o">==</span> <span class="n">index</span> <span class="k">for</span> <span class="n">lvl</span><span class="p">,</span> <span class="n">index</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">level</span><span class="p">)]</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span> <span class="n">rows</span><span class="p">))</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">scols</span><span class="p">)</span>
<span class="k">if</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_level</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">key</span><span class="p">):</span>
<span class="c1"># if spark_frame has one column and one data, return data only without frame</span>
<span class="n">pdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">toPandas</span><span class="p">()</span>
<span class="n">length</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">pdf</span><span class="p">)</span>
<span class="k">if</span> <span class="n">length</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">return</span> <span class="n">pdf</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">index_spark_column_names</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">internal</span><span class="o">.</span><span class="n">index_spark_column_names</span><span class="p">[:</span><span class="n">level</span><span class="p">]</span>
<span class="o">+</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_spark_column_names</span><span class="p">[</span><span class="n">level</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> <span class="p">:]</span>
<span class="p">)</span>
<span class="n">index_names</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_names</span><span class="p">[:</span><span class="n">level</span><span class="p">]</span> <span class="o">+</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_names</span><span class="p">[</span><span class="n">level</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> <span class="p">:]</span>
<span class="n">index_fields</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_fields</span><span class="p">[:</span><span class="n">level</span><span class="p">]</span> <span class="o">+</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_fields</span><span class="p">[</span><span class="n">level</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> <span class="p">:]</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">index_spark_column_names</span><span class="p">],</span>
<span class="n">index_names</span><span class="o">=</span><span class="n">index_names</span><span class="p">,</span>
<span class="n">index_fields</span><span class="o">=</span><span class="n">index_fields</span><span class="p">,</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])],</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span></div>
<div class="viewcode-block" id="Series.pct_change"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.pct_change.html#pyspark.pandas.Series.pct_change">[docs]</a> <span class="k">def</span> <span class="nf">pct_change</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">periods</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Percentage change between the current and a prior element.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to moveing all data into</span>
<span class="sd"> a single partition in a single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method with very large datasets.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> periods : int, default 1</span>
<span class="sd"> Periods to shift for forming percent change.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([90, 91, 85], index=[2, 4, 1])</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 2 90</span>
<span class="sd"> 4 91</span>
<span class="sd"> 1 85</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; psser.pct_change()</span>
<span class="sd"> 2 NaN</span>
<span class="sd"> 4 0.011111</span>
<span class="sd"> 1 -0.065934</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; psser.sort_index().pct_change()</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 0.058824</span>
<span class="sd"> 4 0.011111</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; psser.pct_change(periods=2)</span>
<span class="sd"> 2 NaN</span>
<span class="sd"> 4 NaN</span>
<span class="sd"> 1 -0.055556</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="o">-</span><span class="n">periods</span><span class="p">,</span> <span class="o">-</span><span class="n">periods</span><span class="p">)</span>
<span class="n">prev_row</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">lag</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">periods</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">((</span><span class="n">scol</span> <span class="o">-</span> <span class="n">prev_row</span><span class="p">)</span> <span class="o">/</span> <span class="n">prev_row</span><span class="p">)</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">analyzed</span></div>
<div class="viewcode-block" id="Series.combine_first"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.combine_first.html#pyspark.pandas.Series.combine_first">[docs]</a> <span class="k">def</span> <span class="nf">combine_first</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="s2">&quot;Series&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Combine Series values, choosing the calling Series&#39;s values first.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series</span>
<span class="sd"> The value(s) to be combined with the `Series`.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> The result of combining the Series with the other object.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.combine : Perform element-wise operation on two Series</span>
<span class="sd"> using a given function.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Result index will be the union of the two indexes.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([1, np.nan])</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([3, 4])</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.ops_on_diff_frames&quot;, True):</span>
<span class="sd"> ... s1.combine_first(s2)</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;`combine_first` only allows `Series` for parameter `other`&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
<span class="n">this</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">that</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">combined</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">combine_frames</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">,</span> <span class="n">other</span><span class="o">.</span><span class="n">_psdf</span><span class="p">)</span>
<span class="n">this</span> <span class="o">=</span> <span class="n">combined</span><span class="p">[</span><span class="s2">&quot;this&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_column_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="n">that</span> <span class="o">=</span> <span class="n">combined</span><span class="p">[</span><span class="s2">&quot;that&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_column_for</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="c1"># If `self` has missing value, use value of `other`</span>
<span class="n">cond</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">this</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">that</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">this</span><span class="p">)</span>
<span class="c1"># If `self` and `other` come from same frame, the anchor should be kept</span>
<span class="k">if</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">cond</span><span class="p">)</span> <span class="c1"># TODO: dtype?</span>
<span class="n">index_scols</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="o">*</span><span class="n">index_scols</span><span class="p">,</span> <span class="n">cond</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="p">)</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_sdf</span><span class="p">(</span>
<span class="n">sdf</span><span class="p">,</span> <span class="n">index_fields</span><span class="o">=</span><span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_fields</span><span class="p">,</span> <span class="n">data_fields</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">]</span> <span class="c1"># TODO: dtype?</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span></div>
<div class="viewcode-block" id="Series.dot"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.dot.html#pyspark.pandas.Series.dot">[docs]</a> <span class="k">def</span> <span class="nf">dot</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compute the dot product between the Series and the columns of other.</span>
<span class="sd"> This method computes the dot product between the Series and another</span>
<span class="sd"> one, or the Series and each columns of a DataFrame.</span>
<span class="sd"> It can also be called using `self @ other` in Python &gt;= 3.5.</span>
<span class="sd"> .. note:: This API is slightly different from pandas when indexes from both Series</span>
<span class="sd"> are not aligned and config &#39;compute.eager_check&#39; is False. pandas raise an exception;</span>
<span class="sd"> however, pandas-on-Spark just proceeds and performs by ignoring mismatches with NaN</span>
<span class="sd"> permissively.</span>
<span class="sd"> &gt;&gt;&gt; pdf1 = pd.Series([1, 2, 3], index=[0, 1, 2])</span>
<span class="sd"> &gt;&gt;&gt; pdf2 = pd.Series([1, 2, 3], index=[0, 1, 3])</span>
<span class="sd"> &gt;&gt;&gt; pdf1.dot(pdf2) # doctest: +SKIP</span>
<span class="sd"> ...</span>
<span class="sd"> ValueError: matrices are not aligned</span>
<span class="sd"> &gt;&gt;&gt; psdf1 = ps.Series([1, 2, 3], index=[0, 1, 2])</span>
<span class="sd"> &gt;&gt;&gt; psdf2 = ps.Series([1, 2, 3], index=[0, 1, 3])</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.eager_check&quot;, False):</span>
<span class="sd"> ... psdf1.dot(psdf2) # doctest: +SKIP</span>
<span class="sd"> ...</span>
<span class="sd"> 5</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series, DataFrame.</span>
<span class="sd"> The other object to compute the dot product with its columns.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> scalar, Series</span>
<span class="sd"> Return the dot product of the Series and other if other is a</span>
<span class="sd"> Series, the Series of the dot product of Series and each row of</span>
<span class="sd"> other if other is a DataFrame.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> The Series and other must share the same index if other are a Series</span>
<span class="sd"> or a DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([0, 1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; s.dot(s)</span>
<span class="sd"> 14</span>
<span class="sd"> &gt;&gt;&gt; s @ s</span>
<span class="sd"> 14</span>
<span class="sd"> &gt;&gt;&gt; psdf = ps.DataFrame({&#39;x&#39;: [0, 1, 2, 3], &#39;y&#39;: [0, -1, -2, -3]})</span>
<span class="sd"> &gt;&gt;&gt; psdf</span>
<span class="sd"> x y</span>
<span class="sd"> 0 0 0</span>
<span class="sd"> 1 1 -1</span>
<span class="sd"> 2 2 -2</span>
<span class="sd"> 3 3 -3</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.ops_on_diff_frames&quot;, True):</span>
<span class="sd"> ... s.dot(psdf)</span>
<span class="sd"> ...</span>
<span class="sd"> x 14</span>
<span class="sd"> y -14</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
<span class="k">if</span> <span class="n">get_option</span><span class="p">(</span><span class="s2">&quot;compute.eager_check&quot;</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">cast</span><span class="p">(</span>
<span class="n">ps</span><span class="o">.</span><span class="n">Index</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">sort_values</span><span class="p">()</span>
<span class="p">)</span><span class="o">.</span><span class="n">equals</span><span class="p">(</span><span class="n">cast</span><span class="p">(</span><span class="n">ps</span><span class="o">.</span><span class="n">Index</span><span class="p">,</span> <span class="n">other</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">sort_values</span><span class="p">())):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;matrices are not aligned&quot;</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">)</span> <span class="o">!=</span> <span class="nb">len</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">index</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;matrices are not aligned&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">):</span>
<span class="n">other_copy</span><span class="p">:</span> <span class="n">DataFrame</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">column_labels</span> <span class="o">=</span> <span class="n">other_copy</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span>
<span class="n">self_column_label</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">other_copy</span><span class="p">,</span> <span class="s2">&quot;__self_column__&quot;</span><span class="p">)</span>
<span class="n">other_copy</span><span class="p">[</span><span class="n">self_column_label</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span>
<span class="n">self_psser</span> <span class="o">=</span> <span class="n">other_copy</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">self_column_label</span><span class="p">)</span>
<span class="n">product_pssers</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">cast</span><span class="p">(</span><span class="n">Series</span><span class="p">,</span> <span class="n">other_copy</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">label</span><span class="p">)</span> <span class="o">*</span> <span class="n">self_psser</span><span class="p">)</span> <span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">column_labels</span>
<span class="p">]</span>
<span class="n">dot_product_psser</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span>
<span class="n">other_copy</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_columns</span><span class="p">(</span><span class="n">product_pssers</span><span class="p">,</span> <span class="n">column_labels</span><span class="o">=</span><span class="n">column_labels</span><span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">Series</span><span class="p">,</span> <span class="n">dot_product_psser</span><span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Series</span><span class="p">)</span>
<span class="k">return</span> <span class="p">(</span><span class="bp">self</span> <span class="o">*</span> <span class="n">other</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span></div>
<span class="k">def</span> <span class="fm">__matmul__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Matrix multiplication using binary `@` operator in Python&gt;=3.5.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
<div class="viewcode-block" id="Series.repeat"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.repeat.html#pyspark.pandas.Series.repeat">[docs]</a> <span class="k">def</span> <span class="nf">repeat</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">repeats</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Repeat elements of a Series.</span>
<span class="sd"> Returns a new Series where each element of the current Series</span>
<span class="sd"> is repeated consecutively a given number of times.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> repeats : int or Series</span>
<span class="sd"> The number of repetitions for each element. This should be a</span>
<span class="sd"> non-negative integer. Repeating 0 times will return an empty</span>
<span class="sd"> Series.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Newly created Series with repeated elements.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Index.repeat : Equivalent function for Index.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&#39;a&#39;, &#39;b&#39;, &#39;c&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 a</span>
<span class="sd"> 1 b</span>
<span class="sd"> 2 c</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; s.repeat(2)</span>
<span class="sd"> 0 a</span>
<span class="sd"> 1 b</span>
<span class="sd"> 2 c</span>
<span class="sd"> 0 a</span>
<span class="sd"> 1 b</span>
<span class="sd"> 2 c</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; ps.Series([1, 2, 3]).repeat(0)</span>
<span class="sd"> Series([], dtype: int64)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">repeats</span><span class="p">,</span> <span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">Series</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;`repeats` argument must be integer or Series, but got </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">repeats</span><span class="p">))</span>
<span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">repeats</span><span class="p">,</span> <span class="n">Series</span><span class="p">):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">repeats</span><span class="p">):</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span>
<span class="n">temp_repeats</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">psdf</span><span class="p">,</span> <span class="s2">&quot;__temp_repeats__&quot;</span><span class="p">)</span>
<span class="n">psdf</span><span class="p">[</span><span class="n">temp_repeats</span><span class="p">]</span> <span class="o">=</span> <span class="n">repeats</span>
<span class="k">return</span> <span class="p">(</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="o">.</span><span class="n">repeat</span><span class="p">(</span><span class="n">psdf</span><span class="p">[</span><span class="n">temp_repeats</span><span class="p">])</span>
<span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">explode</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">array_repeat</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> <span class="n">repeats</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s2">&quot;int32&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name_like_string</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">))</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span> <span class="o">+</span> <span class="p">[</span><span class="n">scol</span><span class="p">])</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="p">[</span>
<span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_column_names</span>
<span class="p">],</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">name_like_string</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">))],</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">repeats</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;negative dimensions are not allowed&quot;</span><span class="p">)</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">[[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]]</span>
<span class="k">if</span> <span class="n">repeats</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_filter</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">False</span><span class="p">))))</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">cast</span><span class="p">(</span><span class="s2">&quot;ps.DataFrame&quot;</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">concat</span><span class="p">([</span><span class="n">psdf</span><span class="p">]</span> <span class="o">*</span> <span class="n">repeats</span><span class="p">)))</span></div>
<div class="viewcode-block" id="Series.asof"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.asof.html#pyspark.pandas.Series.asof">[docs]</a> <span class="k">def</span> <span class="nf">asof</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">where</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Any</span><span class="p">,</span> <span class="n">List</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the last row(s) without any NaNs before `where`.</span>
<span class="sd"> The last row (for each element in `where`, if list) without any</span>
<span class="sd"> NaN is taken.</span>
<span class="sd"> If there is no good value, NaN is returned.</span>
<span class="sd"> .. note:: This API is dependent on :meth:`Index.is_monotonic_increasing`</span>
<span class="sd"> which is expensive.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> where : index or array-like of indices</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> scalar or Series</span>
<span class="sd"> The return can be:</span>
<span class="sd"> * scalar : when `self` is a Series and `where` is a scalar</span>
<span class="sd"> * Series: when `self` is a Series and `where` is an array-like</span>
<span class="sd"> Return scalar or Series</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Indices are assumed to be sorted. Raises if this is not the case and config</span>
<span class="sd"> &#39;compute.eager_check&#39; is True. If &#39;compute.eager_check&#39; is False pandas-on-Spark just</span>
<span class="sd"> proceeds and performs by ignoring the indeces&#39;s order</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 10 1.0</span>
<span class="sd"> 20 2.0</span>
<span class="sd"> 30 NaN</span>
<span class="sd"> 40 4.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> A scalar `where`.</span>
<span class="sd"> &gt;&gt;&gt; s.asof(20)</span>
<span class="sd"> 2.0</span>
<span class="sd"> For a sequence `where`, a Series is returned. The first value is</span>
<span class="sd"> NaN, because the first element of `where` is before the first</span>
<span class="sd"> index value.</span>
<span class="sd"> &gt;&gt;&gt; s.asof([5, 20]).sort_index()</span>
<span class="sd"> 5 NaN</span>
<span class="sd"> 20 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Missing values are not considered. The following is ``2.0``, not</span>
<span class="sd"> NaN, even though NaN is at the index location for ``30``.</span>
<span class="sd"> &gt;&gt;&gt; s.asof(30)</span>
<span class="sd"> 2.0</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, np.nan, 4], index=[10, 30, 20, 40])</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.eager_check&quot;, False):</span>
<span class="sd"> ... s.asof(20)</span>
<span class="sd"> ...</span>
<span class="sd"> 1.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">should_return_series</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">MultiIndex</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;asof is not supported for a MultiIndex&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">where</span><span class="p">,</span> <span class="p">(</span><span class="n">ps</span><span class="o">.</span><span class="n">Index</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;where cannot be an Index, Series or a DataFrame&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">get_option</span><span class="p">(</span><span class="s2">&quot;compute.eager_check&quot;</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">is_monotonic_increasing</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;asof requires a sorted index&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_list_like</span><span class="p">(</span><span class="n">where</span><span class="p">):</span>
<span class="n">should_return_series</span> <span class="o">=</span> <span class="kc">False</span>
<span class="n">where</span> <span class="o">=</span> <span class="p">[</span><span class="n">where</span><span class="p">]</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">resolved_copy</span>
<span class="n">index_scol</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">index_type</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">spark_type_for</span><span class="p">(</span><span class="n">index_scol</span><span class="p">)</span>
<span class="n">spark_column</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">data_spark_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">monotonically_increasing_id_column</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span>
<span class="n">internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="p">,</span> <span class="s2">&quot;__monotonically_increasing_id__&quot;</span>
<span class="p">)</span>
<span class="n">cond</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">F</span><span class="o">.</span><span class="n">max_by</span><span class="p">(</span>
<span class="n">spark_column</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="p">(</span><span class="n">index_scol</span> <span class="o">&lt;=</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">index</span><span class="p">)</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">index_type</span><span class="p">))</span> <span class="o">&amp;</span> <span class="n">spark_column</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">()</span>
<span class="k">if</span> <span class="n">pd</span><span class="o">.</span><span class="n">notna</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
<span class="c1"># If index is nan and the value of the col is not null</span>
<span class="c1"># then return monotonically_increasing_id. This will let max by</span>
<span class="c1"># to return last index value, which is the behaviour of pandas</span>
<span class="k">else</span> <span class="n">spark_column</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">(),</span>
<span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">monotonically_increasing_id_column</span><span class="p">),</span>
<span class="p">),</span>
<span class="p">)</span>
<span class="k">for</span> <span class="n">index</span> <span class="ow">in</span> <span class="n">where</span>
<span class="p">]</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">withColumn</span><span class="p">(</span>
<span class="n">monotonically_increasing_id_column</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">monotonically_increasing_id</span><span class="p">()</span>
<span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">cond</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">should_return_series</span><span class="p">:</span>
<span class="k">with</span> <span class="n">sql_conf</span><span class="p">({</span><span class="n">SPARK_CONF_ARROW_ENABLED</span><span class="p">:</span> <span class="kc">False</span><span class="p">}):</span>
<span class="c1"># Disable Arrow to keep row ordering.</span>
<span class="n">result</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">toPandas</span><span class="p">()</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">]</span>
<span class="k">return</span> <span class="n">result</span> <span class="k">if</span> <span class="n">result</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="c1"># The data is expected to be small so it&#39;s fine to transpose/use default index.</span>
<span class="k">with</span> <span class="n">ps</span><span class="o">.</span><span class="n">option_context</span><span class="p">(</span><span class="s2">&quot;compute.default_index_type&quot;</span><span class="p">,</span> <span class="s2">&quot;distributed&quot;</span><span class="p">,</span> <span class="s2">&quot;compute.max_rows&quot;</span><span class="p">,</span> <span class="mi">1</span><span class="p">):</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">where</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="nb">set</span><span class="p">(</span><span class="n">where</span><span class="p">))</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">index_type</span><span class="p">,</span> <span class="n">TimestampType</span><span class="p">):</span>
<span class="n">psdf</span><span class="p">:</span> <span class="n">DataFrame</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">sdf</span><span class="p">)</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Index</span><span class="p">(</span><span class="n">where</span><span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="o">.</span><span class="n">transpose</span><span class="p">())</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># If `where` has duplicate items, leverage the pandas directly</span>
<span class="c1"># since pandas API on Spark doesn&#39;t support the duplicate column name.</span>
<span class="n">pdf</span><span class="p">:</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">toPandas</span><span class="p">()</span>
<span class="n">pdf</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Index</span><span class="p">(</span><span class="n">where</span><span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">pdf</span><span class="o">.</span><span class="n">transpose</span><span class="p">()))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.unstack"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.unstack.html#pyspark.pandas.Series.unstack">[docs]</a> <span class="k">def</span> <span class="nf">unstack</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">level</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Unstack, a.k.a. pivot, Series with MultiIndex to produce DataFrame.</span>
<span class="sd"> The level involved will automatically get sorted.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Unlike pandas, pandas-on-Spark doesn&#39;t check whether an index is duplicated or not</span>
<span class="sd"> because the checking of duplicated index requires scanning whole data which</span>
<span class="sd"> can be quite expensive.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> level : int, str, or list of these, default last level</span>
<span class="sd"> Level(s) to unstack, can pass level name.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> DataFrame</span>
<span class="sd"> Unstacked Series.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4],</span>
<span class="sd"> ... index=pd.MultiIndex.from_product([[&#39;one&#39;, &#39;two&#39;],</span>
<span class="sd"> ... [&#39;a&#39;, &#39;b&#39;]]))</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> one a 1</span>
<span class="sd"> b 2</span>
<span class="sd"> two a 3</span>
<span class="sd"> b 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.unstack(level=-1).sort_index()</span>
<span class="sd"> a b</span>
<span class="sd"> one 1 2</span>
<span class="sd"> two 3 4</span>
<span class="sd"> &gt;&gt;&gt; s.unstack(level=0).sort_index()</span>
<span class="sd"> one two</span>
<span class="sd"> a 1 3</span>
<span class="sd"> b 2 4</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">MultiIndex</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Series.unstack only support for a MultiIndex&quot;</span><span class="p">)</span>
<span class="n">index_nlevels</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">nlevels</span>
<span class="k">if</span> <span class="n">level</span> <span class="o">&gt;</span> <span class="mi">0</span> <span class="ow">and</span> <span class="p">(</span><span class="n">level</span> <span class="o">&gt;</span> <span class="n">index_nlevels</span> <span class="o">-</span> <span class="mi">1</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">IndexError</span><span class="p">(</span>
<span class="s2">&quot;Too many levels: Index has only </span><span class="si">{}</span><span class="s2"> levels, not </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">index_nlevels</span><span class="p">,</span> <span class="n">level</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">elif</span> <span class="n">level</span> <span class="o">&lt;</span> <span class="mi">0</span> <span class="ow">and</span> <span class="p">(</span><span class="n">level</span> <span class="o">&lt;</span> <span class="o">-</span><span class="n">index_nlevels</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">IndexError</span><span class="p">(</span>
<span class="s2">&quot;Too many levels: Index has only </span><span class="si">{}</span><span class="s2"> levels, </span><span class="si">{}</span><span class="s2"> is not a valid level number&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">index_nlevels</span><span class="p">,</span> <span class="n">level</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">resolved_copy</span>
<span class="n">index_map</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
<span class="nb">zip</span><span class="p">(</span><span class="n">internal</span><span class="o">.</span><span class="n">index_spark_column_names</span><span class="p">,</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_names</span><span class="p">,</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_fields</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">pivot_col</span><span class="p">,</span> <span class="n">column_label_names</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">index_map</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">level</span><span class="p">)</span>
<span class="n">index_scol_names</span><span class="p">,</span> <span class="n">index_names</span><span class="p">,</span> <span class="n">index_fields</span> <span class="o">=</span> <span class="nb">zip</span><span class="p">(</span><span class="o">*</span><span class="n">index_map</span><span class="p">)</span>
<span class="n">col</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">spark_frame</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">index_scol_names</span><span class="p">))</span><span class="o">.</span><span class="n">pivot</span><span class="p">(</span><span class="n">pivot_col</span><span class="p">)</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">first</span><span class="p">(</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">col</span><span class="p">)))</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">index_scol_names</span><span class="p">],</span>
<span class="n">index_names</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">index_names</span><span class="p">),</span>
<span class="n">index_fields</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">index_fields</span><span class="p">),</span>
<span class="n">column_label_names</span><span class="o">=</span><span class="p">[</span><span class="n">column_label_names</span><span class="p">],</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span>
<span class="n">field</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">dtype</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
<span class="k">for</span> <span class="n">field</span> <span class="ow">in</span> <span class="n">internal</span><span class="o">.</span><span class="n">data_fields</span>
<span class="p">]</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.item"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.item.html#pyspark.pandas.Series.item">[docs]</a> <span class="k">def</span> <span class="nf">item</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Scalar</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the first element of the underlying data as a Python scalar.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> scalar</span>
<span class="sd"> The first element of Series.</span>
<span class="sd"> Raises</span>
<span class="sd"> ------</span>
<span class="sd"> ValueError</span>
<span class="sd"> If the data is not length-1.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([10])</span>
<span class="sd"> &gt;&gt;&gt; psser.item()</span>
<span class="sd"> 10</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">()</span></div>
<div class="viewcode-block" id="Series.items"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.items.html#pyspark.pandas.Series.items">[docs]</a> <span class="k">def</span> <span class="nf">items</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Lazily iterate over (index, value) tuples.</span>
<span class="sd"> This method returns an iterable tuple (index, value). This is</span>
<span class="sd"> convenient if you want to create a lazy iterator.</span>
<span class="sd"> .. note:: Unlike pandas&#39;, the itmes in pandas-on-Spark returns generator rather</span>
<span class="sd"> zip object</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> iterable</span>
<span class="sd"> Iterable of tuples containing the (index, value) pairs from a</span>
<span class="sd"> Series.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> DataFrame.items : Iterate over (column name, Series) pairs.</span>
<span class="sd"> DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&#39;A&#39;, &#39;B&#39;, &#39;C&#39;])</span>
<span class="sd"> &gt;&gt;&gt; for index, value in s.items():</span>
<span class="sd"> ... print(&quot;Index : {}, Value : {}&quot;.format(index, value))</span>
<span class="sd"> Index : 0, Value : A</span>
<span class="sd"> Index : 1, Value : B</span>
<span class="sd"> Index : 2, Value : C</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">internal_index_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_column_names</span>
<span class="n">internal_data_column</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">def</span> <span class="nf">extract_kv_from_spark_row</span><span class="p">(</span><span class="n">row</span><span class="p">:</span> <span class="n">Row</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<span class="n">k</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">row</span><span class="p">[</span><span class="n">internal_index_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">internal_index_columns</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span>
<span class="k">else</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">row</span><span class="p">[</span><span class="n">c</span><span class="p">]</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">internal_index_columns</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">v</span> <span class="o">=</span> <span class="n">row</span><span class="p">[</span><span class="n">internal_data_column</span><span class="p">]</span>
<span class="k">return</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span>
<span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">map</span><span class="p">(</span>
<span class="n">extract_kv_from_spark_row</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">resolved_copy</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">toLocalIterator</span><span class="p">()</span>
<span class="p">):</span>
<span class="k">yield</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span></div>
<div class="viewcode-block" id="Series.droplevel"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.droplevel.html#pyspark.pandas.Series.droplevel">[docs]</a> <span class="k">def</span> <span class="nf">droplevel</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">level</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Name</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Name</span><span class="p">]]])</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return Series with requested index level(s) removed.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> level : int, str, or list-like</span>
<span class="sd"> If a string is given, must be the name of a level</span>
<span class="sd"> If list-like, elements must be names or positional indexes</span>
<span class="sd"> of levels.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with requested index level(s) removed.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series(</span>
<span class="sd"> ... [1, 2, 3],</span>
<span class="sd"> ... index=pd.MultiIndex.from_tuples(</span>
<span class="sd"> ... [(&quot;x&quot;, &quot;a&quot;), (&quot;x&quot;, &quot;b&quot;), (&quot;y&quot;, &quot;c&quot;)], names=[&quot;level_1&quot;, &quot;level_2&quot;]</span>
<span class="sd"> ... ),</span>
<span class="sd"> ... )</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> level_1 level_2</span>
<span class="sd"> x a 1</span>
<span class="sd"> b 2</span>
<span class="sd"> y c 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Removing specific index level by level</span>
<span class="sd"> &gt;&gt;&gt; psser.droplevel(0)</span>
<span class="sd"> level_2</span>
<span class="sd"> a 1</span>
<span class="sd"> b 2</span>
<span class="sd"> c 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Removing specific index level by name</span>
<span class="sd"> &gt;&gt;&gt; psser.droplevel(&quot;level_2&quot;)</span>
<span class="sd"> level_1</span>
<span class="sd"> x 1</span>
<span class="sd"> x 2</span>
<span class="sd"> y 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">droplevel</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="n">level</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.tail"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.tail.html#pyspark.pandas.Series.tail">[docs]</a> <span class="k">def</span> <span class="nf">tail</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the last `n` rows.</span>
<span class="sd"> This function returns last `n` rows from the object based on</span>
<span class="sd"> position. It is useful for quickly verifying data, for example,</span>
<span class="sd"> after sorting or appending rows.</span>
<span class="sd"> For negative values of `n`, this function returns all rows except</span>
<span class="sd"> the first `n` rows, equivalent to ``df[n:]``.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> n : int, default 5</span>
<span class="sd"> Number of rows to select.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> type of caller</span>
<span class="sd"> The last `n` rows of the caller object.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> DataFrame.head : The first `n` rows of the caller object.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([1, 2, 3, 4, 5])</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> 4 5</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; psser.tail(3) # doctest: +SKIP</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> 4 5</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">tail</span><span class="p">(</span><span class="n">n</span><span class="o">=</span><span class="n">n</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.explode"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.explode.html#pyspark.pandas.Series.explode">[docs]</a> <span class="k">def</span> <span class="nf">explode</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Transform each element of a list-like to a row.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Exploded lists to rows; index will be duplicated for these rows.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.str.split : Split string values on specified separator.</span>
<span class="sd"> Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex</span>
<span class="sd"> to produce DataFrame.</span>
<span class="sd"> DataFrame.melt : Unpivot a DataFrame from wide format to long format.</span>
<span class="sd"> DataFrame.explode : Explode a DataFrame from list-like</span>
<span class="sd"> columns to long format.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([[1, 2, 3], [], [3, 4]])</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 0 [1, 2, 3]</span>
<span class="sd"> 1 []</span>
<span class="sd"> 2 [3, 4]</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; psser.explode() # doctest: +SKIP</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 0 2.0</span>
<span class="sd"> 0 3.0</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 2 4.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">ArrayType</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">explode_outer</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name_like_string</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">))</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_columns</span><span class="p">([</span><span class="n">scol</span><span class="p">],</span> <span class="n">keep_order</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span></div>
<div class="viewcode-block" id="Series.argsort"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.argsort.html#pyspark.pandas.Series.argsort">[docs]</a> <span class="k">def</span> <span class="nf">argsort</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the integer indices that would sort the Series values.</span>
<span class="sd"> Unlike pandas, the index order is not preserved in the result.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Positions of values within the sort order with -1 indicating</span>
<span class="sd"> nan values.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([3, 3, 4, 1, 6, 2, 3, 7, 8, 7, 10])</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 0 3</span>
<span class="sd"> 1 3</span>
<span class="sd"> 2 4</span>
<span class="sd"> 3 1</span>
<span class="sd"> 4 6</span>
<span class="sd"> 5 2</span>
<span class="sd"> 6 3</span>
<span class="sd"> 7 7</span>
<span class="sd"> 8 8</span>
<span class="sd"> 9 7</span>
<span class="sd"> 10 10</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; psser.argsort().sort_index()</span>
<span class="sd"> 0 3</span>
<span class="sd"> 1 5</span>
<span class="sd"> 2 0</span>
<span class="sd"> 3 1</span>
<span class="sd"> 4 6</span>
<span class="sd"> 5 2</span>
<span class="sd"> 6 4</span>
<span class="sd"> 7 7</span>
<span class="sd"> 8 9</span>
<span class="sd"> 9 8</span>
<span class="sd"> 10 10</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
<span class="s2">&quot;The behavior of Series.argsort in the presence of NA values is deprecated. &quot;</span>
<span class="s2">&quot;In a future version, NA values will be ordered last instead of set to -1.&quot;</span><span class="p">,</span>
<span class="ne">FutureWarning</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">notnull</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">notnull</span><span class="p">()]</span>
<span class="n">sdf_for_index</span> <span class="o">=</span> <span class="n">notnull</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">notnull</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">)</span>
<span class="n">tmp_join_key</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">sdf_for_index</span><span class="p">,</span> <span class="s2">&quot;__tmp_join_key__&quot;</span><span class="p">)</span>
<span class="n">sdf_for_index</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="o">.</span><span class="n">attach_distributed_sequence_column</span><span class="p">(</span>
<span class="n">sdf_for_index</span><span class="p">,</span> <span class="n">tmp_join_key</span>
<span class="p">)</span>
<span class="c1"># sdf_for_index:</span>
<span class="c1"># +----------------+-----------------+</span>
<span class="c1"># |__tmp_join_key__|__index_level_0__|</span>
<span class="c1"># +----------------+-----------------+</span>
<span class="c1"># | 0| 0|</span>
<span class="c1"># | 1| 1|</span>
<span class="c1"># | 2| 2|</span>
<span class="c1"># | 3| 3|</span>
<span class="c1"># | 4| 4|</span>
<span class="c1"># +----------------+-----------------+</span>
<span class="n">sdf_for_data</span> <span class="o">=</span> <span class="n">notnull</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="n">notnull</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;values&quot;</span><span class="p">),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span>
<span class="p">)</span>
<span class="n">sdf_for_data</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="o">.</span><span class="n">attach_distributed_sequence_column</span><span class="p">(</span>
<span class="n">sdf_for_data</span><span class="p">,</span> <span class="n">SPARK_DEFAULT_SERIES_NAME</span>
<span class="p">)</span>
<span class="c1"># sdf_for_data:</span>
<span class="c1"># +---+------+-----------------+</span>
<span class="c1"># | 0|values|__natural_order__|</span>
<span class="c1"># +---+------+-----------------+</span>
<span class="c1"># | 0| 3| 25769803776|</span>
<span class="c1"># | 1| 3| 51539607552|</span>
<span class="c1"># | 2| 4| 77309411328|</span>
<span class="c1"># | 3| 1| 103079215104|</span>
<span class="c1"># | 4| 2| 128849018880|</span>
<span class="c1"># +---+------+-----------------+</span>
<span class="n">sdf_for_data</span> <span class="o">=</span> <span class="n">sdf_for_data</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span>
<span class="n">scol_for</span><span class="p">(</span><span class="n">sdf_for_data</span><span class="p">,</span> <span class="s2">&quot;values&quot;</span><span class="p">),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span>
<span class="p">)</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s2">&quot;values&quot;</span><span class="p">,</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="n">tmp_join_key</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">sdf_for_data</span><span class="p">,</span> <span class="s2">&quot;__tmp_join_key__&quot;</span><span class="p">)</span>
<span class="n">sdf_for_data</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="o">.</span><span class="n">attach_distributed_sequence_column</span><span class="p">(</span><span class="n">sdf_for_data</span><span class="p">,</span> <span class="n">tmp_join_key</span><span class="p">)</span>
<span class="c1"># sdf_for_index: sdf_for_data:</span>
<span class="c1"># +----------------+-----------------+ +----------------+---+</span>
<span class="c1"># |__tmp_join_key__|__index_level_0__| |__tmp_join_key__| 0|</span>
<span class="c1"># +----------------+-----------------+ +----------------+---+</span>
<span class="c1"># | 0| 0| | 0| 3|</span>
<span class="c1"># | 1| 1| | 1| 4|</span>
<span class="c1"># | 2| 2| | 2| 0|</span>
<span class="c1"># | 3| 3| | 3| 1|</span>
<span class="c1"># | 4| 4| | 4| 2|</span>
<span class="c1"># +----------------+-----------------+ +----------------+---+</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf_for_index</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">sdf_for_data</span><span class="p">,</span> <span class="n">on</span><span class="o">=</span><span class="n">tmp_join_key</span><span class="p">)</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">tmp_join_key</span><span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_sdf</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">data_columns</span><span class="o">=</span><span class="p">[</span><span class="n">SPARK_DEFAULT_SERIES_NAME</span><span class="p">],</span>
<span class="n">index_fields</span><span class="o">=</span><span class="p">[</span>
<span class="n">InternalField</span><span class="p">(</span><span class="n">dtype</span><span class="o">=</span><span class="n">field</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span> <span class="k">for</span> <span class="n">field</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_fields</span>
<span class="p">],</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">],</span>
<span class="p">)</span>
<span class="n">psser</span> <span class="o">=</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span>
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span>
<span class="n">Series</span><span class="p">,</span>
<span class="n">ps</span><span class="o">.</span><span class="n">concat</span><span class="p">([</span><span class="n">psser</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">isnull</span><span class="p">()]</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="k">lambda</span> <span class="n">_</span><span class="p">:</span> <span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">))]),</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="Series.argmax"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.argmax.html#pyspark.pandas.Series.argmax">[docs]</a> <span class="k">def</span> <span class="nf">argmax</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return int position of the largest value in the Series.</span>
<span class="sd"> If the maximum is achieved in multiple locations,</span>
<span class="sd"> the first row position is returned.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> axis : None</span>
<span class="sd"> Dummy argument for consistency with Series.</span>
<span class="sd"> skipna : bool, default True</span>
<span class="sd"> Exclude NA/null values.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> int</span>
<span class="sd"> Row position of the maximum value.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Consider dataset containing cereal calories</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series({&#39;Corn Flakes&#39;: 100.0, &#39;Almond Delight&#39;: 110.0, &#39;Unknown&#39;: np.nan,</span>
<span class="sd"> ... &#39;Cinnamon Toast Crunch&#39;: 120.0, &#39;Cocoa Puff&#39;: 110.0})</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> Corn Flakes 100.0</span>
<span class="sd"> Almond Delight 110.0</span>
<span class="sd"> Unknown NaN</span>
<span class="sd"> Cinnamon Toast Crunch 120.0</span>
<span class="sd"> Cocoa Puff 110.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.argmax()</span>
<span class="sd"> 3</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">,</span> <span class="n">none_axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="k">if</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;axis can only be 0 or &#39;index&#39;&quot;</span><span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="n">seq_col_name</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="s2">&quot;__distributed_sequence_column__&quot;</span><span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="o">.</span><span class="n">attach_distributed_sequence_column</span><span class="p">(</span>
<span class="n">sdf</span><span class="p">,</span>
<span class="n">seq_col_name</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="k">if</span> <span class="n">skipna</span><span class="p">:</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">desc_nulls_last</span><span class="p">(),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">,</span> <span class="n">seq_col_name</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">desc_nulls_first</span><span class="p">(),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">,</span> <span class="n">seq_col_name</span><span class="p">)</span>
<span class="n">results</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">seq_col_name</span><span class="p">)</span><span class="o">.</span><span class="n">take</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">results</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;attempt to get argmax of an empty sequence&quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">max_value</span> <span class="o">=</span> <span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="c1"># If the maximum is achieved in multiple locations, the first row position is returned.</span>
<span class="k">if</span> <span class="n">max_value</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
<span class="s2">&quot;The behavior of Series.argmax/argmin &quot;</span>
<span class="s2">&quot;with skipna=False and NAs, or with all-NAs is deprecated. &quot;</span>
<span class="s2">&quot;In a future version this will raise ValueError.&quot;</span><span class="p">,</span>
<span class="ne">FutureWarning</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">return</span> <span class="o">-</span><span class="mi">1</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">max_value</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span></div>
<div class="viewcode-block" id="Series.argmin"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.argmin.html#pyspark.pandas.Series.argmin">[docs]</a> <span class="k">def</span> <span class="nf">argmin</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return int position of the smallest value in the Series.</span>
<span class="sd"> If the minimum is achieved in multiple locations,</span>
<span class="sd"> the first row position is returned.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> axis : None</span>
<span class="sd"> Dummy argument for consistency with Series.</span>
<span class="sd"> skipna : bool, default True</span>
<span class="sd"> Exclude NA/null values.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> int</span>
<span class="sd"> Row position of the minimum value.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Consider dataset containing cereal calories</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series({&#39;Corn Flakes&#39;: 100.0, &#39;Almond Delight&#39;: 110.0,</span>
<span class="sd"> ... &#39;Cinnamon Toast Crunch&#39;: 120.0, &#39;Cocoa Puff&#39;: 110.0})</span>
<span class="sd"> &gt;&gt;&gt; s # doctest: +SKIP</span>
<span class="sd"> Corn Flakes 100.0</span>
<span class="sd"> Almond Delight 110.0</span>
<span class="sd"> Cinnamon Toast Crunch 120.0</span>
<span class="sd"> Cocoa Puff 110.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.argmin() # doctest: +SKIP</span>
<span class="sd"> 0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">,</span> <span class="n">none_axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="k">if</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;axis can only be 0 or &#39;index&#39;&quot;</span><span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="n">seq_col_name</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="s2">&quot;__distributed_sequence_column__&quot;</span><span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="o">.</span><span class="n">attach_distributed_sequence_column</span><span class="p">(</span>
<span class="n">sdf</span><span class="p">,</span>
<span class="n">seq_col_name</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="k">if</span> <span class="n">skipna</span><span class="p">:</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">asc_nulls_last</span><span class="p">(),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">,</span> <span class="n">seq_col_name</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">asc_nulls_first</span><span class="p">(),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">,</span> <span class="n">seq_col_name</span><span class="p">)</span>
<span class="n">results</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">seq_col_name</span><span class="p">)</span><span class="o">.</span><span class="n">take</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">results</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;attempt to get argmin of an empty sequence&quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">min_value</span> <span class="o">=</span> <span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="c1"># If the maximum is achieved in multiple locations, the first row position is returned.</span>
<span class="k">if</span> <span class="n">min_value</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
<span class="s2">&quot;The behavior of Series.argmax/argmin &quot;</span>
<span class="s2">&quot;with skipna=False and NAs, or with all-NAs is deprecated. &quot;</span>
<span class="s2">&quot;In a future version this will raise ValueError.&quot;</span><span class="p">,</span>
<span class="ne">FutureWarning</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">return</span> <span class="o">-</span><span class="mi">1</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">min_value</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span></div>
<div class="viewcode-block" id="Series.compare"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.compare.html#pyspark.pandas.Series.compare">[docs]</a> <span class="k">def</span> <span class="nf">compare</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">keep_shape</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">keep_equal</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compare to another Series and show the differences.</span>
<span class="sd"> .. note:: This API is slightly different from pandas when indexes from both Series</span>
<span class="sd"> are not identical and config &#39;compute.eager_check&#39; is False. pandas raise an exception;</span>
<span class="sd"> however, pandas-on-Spark just proceeds and performs by ignoring mismatches.</span>
<span class="sd"> &gt;&gt;&gt; psser1 = ps.Series([1, 2, 3, 4, 5], index=pd.Index([1, 2, 3, 4, 5]))</span>
<span class="sd"> &gt;&gt;&gt; psser2 = ps.Series([1, 2, 3, 4, 5], index=pd.Index([1, 2, 4, 3, 6]))</span>
<span class="sd"> &gt;&gt;&gt; psser1.compare(psser2) # doctest: +SKIP</span>
<span class="sd"> ...</span>
<span class="sd"> ValueError: Can only compare identically-labeled Series objects</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.eager_check&quot;, False):</span>
<span class="sd"> ... psser1.compare(psser2) # doctest: +SKIP</span>
<span class="sd"> ...</span>
<span class="sd"> self other</span>
<span class="sd"> 3 3.0 4.0</span>
<span class="sd"> 4 4.0 3.0</span>
<span class="sd"> 5 5.0 NaN</span>
<span class="sd"> 6 NaN 5.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series</span>
<span class="sd"> Object to compare with.</span>
<span class="sd"> keep_shape : bool, default False</span>
<span class="sd"> If true, all rows and columns are kept.</span>
<span class="sd"> Otherwise, only the ones with different values are kept.</span>
<span class="sd"> keep_equal : bool, default False</span>
<span class="sd"> If true, the result keeps values that are equal.</span>
<span class="sd"> Otherwise, equal values are shown as NaNs.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> DataFrame</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Matching NaNs will not appear as a difference.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.pandas.config import set_option, reset_option</span>
<span class="sd"> &gt;&gt;&gt; set_option(&quot;compute.ops_on_diff_frames&quot;, True)</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;, &quot;d&quot;, &quot;e&quot;])</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([&quot;a&quot;, &quot;a&quot;, &quot;c&quot;, &quot;b&quot;, &quot;e&quot;])</span>
<span class="sd"> Align the differences on columns</span>
<span class="sd"> &gt;&gt;&gt; s1.compare(s2).sort_index()</span>
<span class="sd"> self other</span>
<span class="sd"> 1 b a</span>
<span class="sd"> 3 d b</span>
<span class="sd"> Keep all original rows</span>
<span class="sd"> &gt;&gt;&gt; s1.compare(s2, keep_shape=True).sort_index()</span>
<span class="sd"> self other</span>
<span class="sd"> 0 None None</span>
<span class="sd"> 1 b a</span>
<span class="sd"> 2 None None</span>
<span class="sd"> 3 d b</span>
<span class="sd"> 4 None None</span>
<span class="sd"> Keep all original rows and all original values</span>
<span class="sd"> &gt;&gt;&gt; s1.compare(s2, keep_shape=True, keep_equal=True).sort_index()</span>
<span class="sd"> self other</span>
<span class="sd"> 0 a a</span>
<span class="sd"> 1 b a</span>
<span class="sd"> 2 c c</span>
<span class="sd"> 3 d b</span>
<span class="sd"> 4 e e</span>
<span class="sd"> &gt;&gt;&gt; reset_option(&quot;compute.ops_on_diff_frames&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">combined</span><span class="p">:</span> <span class="n">DataFrame</span>
<span class="k">if</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
<span class="n">self_column_label</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">to_frame</span><span class="p">(),</span> <span class="s2">&quot;__self_column__&quot;</span><span class="p">)</span>
<span class="n">other_column_label</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">(),</span> <span class="s2">&quot;__other_column__&quot;</span><span class="p">)</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_columns</span><span class="p">(</span>
<span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">self_column_label</span><span class="p">),</span> <span class="n">other</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">other_column_label</span><span class="p">)]</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">get_option</span><span class="p">(</span><span class="s2">&quot;compute.eager_check&quot;</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">equals</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">index</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Can only compare identically-labeled Series objects&quot;</span><span class="p">)</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">combine_frames</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">(),</span> <span class="n">other</span><span class="o">.</span><span class="n">to_frame</span><span class="p">())</span>
<span class="n">this_column_label</span> <span class="o">=</span> <span class="s2">&quot;self&quot;</span>
<span class="n">that_column_label</span> <span class="o">=</span> <span class="s2">&quot;other&quot;</span>
<span class="k">if</span> <span class="n">keep_equal</span> <span class="ow">and</span> <span class="n">keep_shape</span><span class="p">:</span>
<span class="n">combined</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Index</span><span class="p">([</span><span class="n">this_column_label</span><span class="p">,</span> <span class="n">that_column_label</span><span class="p">])</span>
<span class="k">return</span> <span class="n">combined</span>
<span class="n">this_data_scol</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">that_data_scol</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_columns</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="n">index_scols</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span>
<span class="k">if</span> <span class="n">keep_shape</span><span class="p">:</span>
<span class="n">this_scol</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">this_data_scol</span> <span class="o">==</span> <span class="n">that_data_scol</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">this_data_scol</span><span class="p">)</span>
<span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">this_column_label</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">this_field</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="n">this_column_label</span><span class="p">,</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">True</span>
<span class="p">)</span>
<span class="n">that_scol</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">this_data_scol</span> <span class="o">==</span> <span class="n">that_data_scol</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">that_data_scol</span><span class="p">)</span>
<span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">that_column_label</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">that_field</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="n">that_column_label</span><span class="p">,</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">True</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="o">~</span><span class="n">this_data_scol</span><span class="o">.</span><span class="n">eqNullSafe</span><span class="p">(</span><span class="n">that_data_scol</span><span class="p">))</span>
<span class="n">this_scol</span> <span class="o">=</span> <span class="n">this_data_scol</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">this_column_label</span><span class="p">)</span>
<span class="n">this_field</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">this_column_label</span><span class="p">)</span>
<span class="n">that_scol</span> <span class="o">=</span> <span class="n">that_data_scol</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">that_column_label</span><span class="p">)</span>
<span class="n">that_field</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">that_column_label</span><span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">index_scols</span><span class="p">,</span> <span class="n">this_scol</span><span class="p">,</span> <span class="n">that_scol</span><span class="p">,</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="p">[</span>
<span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_column_names</span>
<span class="p">],</span>
<span class="n">index_names</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_names</span><span class="p">,</span>
<span class="n">index_fields</span><span class="o">=</span><span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_fields</span><span class="p">,</span>
<span class="n">column_labels</span><span class="o">=</span><span class="p">[(</span><span class="n">this_column_label</span><span class="p">,),</span> <span class="p">(</span><span class="n">that_column_label</span><span class="p">,)],</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">this_column_label</span><span class="p">),</span> <span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">that_column_label</span><span class="p">)],</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span><span class="n">this_field</span><span class="p">,</span> <span class="n">that_field</span><span class="p">],</span>
<span class="n">column_label_names</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">],</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">)</span></div>
<span class="c1"># TODO(SPARK-40553): 1, support array-like &#39;value&#39;; 2, add parameter &#39;sorter&#39;</span>
<div class="viewcode-block" id="Series.searchsorted"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.searchsorted.html#pyspark.pandas.Series.searchsorted">[docs]</a> <span class="k">def</span> <span class="nf">searchsorted</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="n">side</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;left&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Find indices where elements should be inserted to maintain order.</span>
<span class="sd"> Find the indices into a sorted Series self such that, if the corresponding elements</span>
<span class="sd"> in value were inserted before the indices, the order of self would be preserved.</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> value : scalar</span>
<span class="sd"> Values to insert into self.</span>
<span class="sd"> side : {‘left’, ‘right’}, optional</span>
<span class="sd"> If ‘left’, the index of the first suitable location found is given.</span>
<span class="sd"> If ‘right’, return the last such index. If there is no suitable index,</span>
<span class="sd"> return either 0 or N (where N is the length of self).</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> int</span>
<span class="sd"> insertion point</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> The Series must be monotonically sorted, otherwise wrong locations will likely be returned.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; ser = ps.Series([1, 2, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; ser.searchsorted(0)</span>
<span class="sd"> 0</span>
<span class="sd"> &gt;&gt;&gt; ser.searchsorted(1)</span>
<span class="sd"> 0</span>
<span class="sd"> &gt;&gt;&gt; ser.searchsorted(2)</span>
<span class="sd"> 1</span>
<span class="sd"> &gt;&gt;&gt; ser.searchsorted(5)</span>
<span class="sd"> 4</span>
<span class="sd"> &gt;&gt;&gt; ser.searchsorted(0, side=&quot;right&quot;)</span>
<span class="sd"> 0</span>
<span class="sd"> &gt;&gt;&gt; ser.searchsorted(1, side=&quot;right&quot;)</span>
<span class="sd"> 1</span>
<span class="sd"> &gt;&gt;&gt; ser.searchsorted(2, side=&quot;right&quot;)</span>
<span class="sd"> 3</span>
<span class="sd"> &gt;&gt;&gt; ser.searchsorted(5, side=&quot;right&quot;)</span>
<span class="sd"> 4</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">side</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;left&quot;</span><span class="p">,</span> <span class="s2">&quot;right&quot;</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Invalid side </span><span class="si">{</span><span class="n">side</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span>
<span class="n">index_col_name</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="s2">&quot;__search_sorted_index_col__&quot;</span><span class="p">)</span>
<span class="n">value_col_name</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="s2">&quot;__search_sorted_value_col__&quot;</span><span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="o">.</span><span class="n">attach_distributed_sequence_column</span><span class="p">(</span>
<span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">value_col_name</span><span class="p">)),</span> <span class="n">index_col_name</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">side</span> <span class="o">==</span> <span class="s2">&quot;left&quot;</span><span class="p">:</span>
<span class="n">results</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="o">&lt;=</span> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">value_col_name</span><span class="p">),</span> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">index_col_name</span><span class="p">))),</span>
<span class="n">F</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">0</span><span class="p">)),</span>
<span class="p">)</span><span class="o">.</span><span class="n">take</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">results</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="o">&lt;</span> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">value_col_name</span><span class="p">),</span> <span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">index_col_name</span><span class="p">))),</span>
<span class="n">F</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="mi">0</span><span class="p">)),</span>
<span class="p">)</span><span class="o">.</span><span class="n">take</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">results</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">return</span> <span class="mi">0</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">]</span> <span class="k">if</span> <span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span></div>
<div class="viewcode-block" id="Series.align"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.align.html#pyspark.pandas.Series.align">[docs]</a> <span class="k">def</span> <span class="nf">align</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">DataFrame</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">],</span>
<span class="n">join</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;outer&quot;</span><span class="p">,</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">copy</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="n">DataFrame</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Align two objects on their axes with the specified join method.</span>
<span class="sd"> Join method is specified for each axis Index.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : DataFrame or Series</span>
<span class="sd"> join : {{&#39;outer&#39;, &#39;inner&#39;, &#39;left&#39;, &#39;right&#39;}}, default &#39;outer&#39;</span>
<span class="sd"> axis : allowed axis of the other object, default None</span>
<span class="sd"> Align on index (0), columns (1), or both (None).</span>
<span class="sd"> copy : bool, default True</span>
<span class="sd"> Always returns new objects. If copy=False and no reindexing is</span>
<span class="sd"> required then original objects are returned.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> (left, right) : (Series, type of other)</span>
<span class="sd"> Aligned objects.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; ps.set_option(&quot;compute.ops_on_diff_frames&quot;, True)</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([7, 8, 9], index=[10, 11, 12])</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([&quot;g&quot;, &quot;h&quot;, &quot;i&quot;], index=[10, 20, 30])</span>
<span class="sd"> &gt;&gt;&gt; aligned_l, aligned_r = s1.align(s2)</span>
<span class="sd"> &gt;&gt;&gt; aligned_l.sort_index()</span>
<span class="sd"> 10 7.0</span>
<span class="sd"> 11 8.0</span>
<span class="sd"> 12 9.0</span>
<span class="sd"> 20 NaN</span>
<span class="sd"> 30 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; aligned_r.sort_index()</span>
<span class="sd"> 10 g</span>
<span class="sd"> 11 None</span>
<span class="sd"> 12 None</span>
<span class="sd"> 20 h</span>
<span class="sd"> 30 i</span>
<span class="sd"> dtype: object</span>
<span class="sd"> Align with the join type &quot;inner&quot;:</span>
<span class="sd"> &gt;&gt;&gt; aligned_l, aligned_r = s1.align(s2, join=&quot;inner&quot;)</span>
<span class="sd"> &gt;&gt;&gt; aligned_l.sort_index()</span>
<span class="sd"> 10 7</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; aligned_r.sort_index()</span>
<span class="sd"> 10 g</span>
<span class="sd"> dtype: object</span>
<span class="sd"> Align with a DataFrame:</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;a&quot;: [1, 2, 3], &quot;b&quot;: [&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]}, index=[10, 20, 30])</span>
<span class="sd"> &gt;&gt;&gt; aligned_l, aligned_r = s1.align(df)</span>
<span class="sd"> &gt;&gt;&gt; aligned_l.sort_index()</span>
<span class="sd"> 10 7.0</span>
<span class="sd"> 11 8.0</span>
<span class="sd"> 12 9.0</span>
<span class="sd"> 20 NaN</span>
<span class="sd"> 30 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; aligned_r.sort_index()</span>
<span class="sd"> a b</span>
<span class="sd"> 10 1.0 a</span>
<span class="sd"> 11 NaN None</span>
<span class="sd"> 12 NaN None</span>
<span class="sd"> 20 2.0 b</span>
<span class="sd"> 30 3.0 c</span>
<span class="sd"> &gt;&gt;&gt; ps.reset_option(&quot;compute.ops_on_diff_frames&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span>
<span class="k">if</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Series does not support columns axis.&quot;</span><span class="p">)</span>
<span class="n">self_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span>
<span class="n">left</span><span class="p">,</span> <span class="n">right</span> <span class="o">=</span> <span class="n">self_df</span><span class="o">.</span><span class="n">align</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">join</span><span class="o">=</span><span class="n">join</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="k">if</span> <span class="n">left</span> <span class="ow">is</span> <span class="n">self_df</span><span class="p">:</span>
<span class="n">left_ser</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">left_ser</span> <span class="o">=</span> <span class="n">first_series</span><span class="p">(</span><span class="n">left</span><span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">return</span> <span class="p">(</span><span class="n">left_ser</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">right</span><span class="o">.</span><span class="n">copy</span><span class="p">())</span> <span class="k">if</span> <span class="n">copy</span> <span class="k">else</span> <span class="p">(</span><span class="n">left_ser</span><span class="p">,</span> <span class="n">right</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.between_time"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.between_time.html#pyspark.pandas.Series.between_time">[docs]</a> <span class="k">def</span> <span class="nf">between_time</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">start_time</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">datetime</span><span class="o">.</span><span class="n">time</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span>
<span class="n">end_time</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">datetime</span><span class="o">.</span><span class="n">time</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span>
<span class="n">inclusive</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;both&quot;</span><span class="p">,</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Select values between particular times of the day (example: 9:00-9:30 AM).</span>
<span class="sd"> By setting ``start_time`` to be later than ``end_time``,</span>
<span class="sd"> you can get the times that are *not* between the two times.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> start_time : datetime.time or str</span>
<span class="sd"> Initial time as a time filter limit.</span>
<span class="sd"> end_time : datetime.time or str</span>
<span class="sd"> End time as a time filter limit.</span>
<span class="sd"> inclusive : {&quot;both&quot;, &quot;neither&quot;, &quot;left&quot;, &quot;right&quot;}, default &quot;both&quot;</span>
<span class="sd"> Include boundaries; whether to set each bound as closed or open.</span>
<span class="sd"> .. versionadded:: 4.0.0</span>
<span class="sd"> axis : {0 or &#39;index&#39;, 1 or &#39;columns&#39;}, default 0</span>
<span class="sd"> Determine range time on index or columns value.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Data from the original object filtered to the specified dates range.</span>
<span class="sd"> Raises</span>
<span class="sd"> ------</span>
<span class="sd"> TypeError</span>
<span class="sd"> If the index is not a :class:`DatetimeIndex`</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> at_time : Select values at a particular time of the day.</span>
<span class="sd"> last : Select final periods of time series based on a date offset.</span>
<span class="sd"> DatetimeIndex.indexer_between_time : Get just the index locations for</span>
<span class="sd"> values between particular times of the day.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; idx = pd.date_range(&#39;2018-04-09&#39;, periods=4, freq=&#39;1D20min&#39;)</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([1, 2, 3, 4], index=idx)</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 2018-04-09 00:00:00 1</span>
<span class="sd"> 2018-04-10 00:20:00 2</span>
<span class="sd"> 2018-04-11 00:40:00 3</span>
<span class="sd"> 2018-04-12 01:00:00 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; psser.between_time(&#39;0:15&#39;, &#39;0:45&#39;) # doctest: +SKIP</span>
<span class="sd"> 2018-04-10 00:20:00 2</span>
<span class="sd"> 2018-04-11 00:40:00 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">between_time</span><span class="p">(</span><span class="n">start_time</span><span class="p">,</span> <span class="n">end_time</span><span class="p">,</span> <span class="n">inclusive</span><span class="p">,</span> <span class="n">axis</span><span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.at_time"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.at_time.html#pyspark.pandas.Series.at_time">[docs]</a> <span class="k">def</span> <span class="nf">at_time</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">time</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">datetime</span><span class="o">.</span><span class="n">time</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span> <span class="n">asof</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="mi">0</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Select values at particular time of day (example: 9:30AM).</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> time : datetime.time or str</span>
<span class="sd"> axis : {0 or &#39;index&#39;, 1 or &#39;columns&#39;}, default 0</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Raises</span>
<span class="sd"> ------</span>
<span class="sd"> TypeError</span>
<span class="sd"> If the index is not a :class:`DatetimeIndex`</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> between_time : Select values between particular times of the day.</span>
<span class="sd"> DatetimeIndex.indexer_at_time : Get just the index locations for</span>
<span class="sd"> values at particular time of the day.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; idx = pd.date_range(&#39;2018-04-09&#39;, periods=4, freq=&#39;12H&#39;)</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([1, 2, 3, 4], index=idx)</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 2018-04-09 00:00:00 1</span>
<span class="sd"> 2018-04-09 12:00:00 2</span>
<span class="sd"> 2018-04-10 00:00:00 3</span>
<span class="sd"> 2018-04-10 12:00:00 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; psser.at_time(&#39;12:00&#39;)</span>
<span class="sd"> 2018-04-09 12:00:00 2</span>
<span class="sd"> 2018-04-10 12:00:00 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">at_time</span><span class="p">(</span><span class="n">time</span><span class="p">,</span> <span class="n">asof</span><span class="p">,</span> <span class="n">axis</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_cum</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">PySparkColumn</span><span class="p">],</span> <span class="n">PySparkColumn</span><span class="p">],</span>
<span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span>
<span class="n">part_cols</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="s2">&quot;ColumnOrName&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(),</span>
<span class="n">ascending</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="c1"># This is used to cummin, cummax, cumsum, etc.</span>
<span class="k">if</span> <span class="n">ascending</span><span class="p">:</span>
<span class="n">window</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">asc</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">))</span>
<span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">part_cols</span><span class="p">)</span>
<span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">window</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">desc</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">))</span>
<span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">part_cols</span><span class="p">)</span>
<span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">skipna</span><span class="p">:</span>
<span class="c1"># There is a behavior difference between pandas and PySpark. In case of cummax,</span>
<span class="c1">#</span>
<span class="c1"># Input:</span>
<span class="c1"># A B</span>
<span class="c1"># 0 2.0 1.0</span>
<span class="c1"># 1 5.0 NaN</span>
<span class="c1"># 2 1.0 0.0</span>
<span class="c1"># 3 2.0 4.0</span>
<span class="c1"># 4 4.0 9.0</span>
<span class="c1">#</span>
<span class="c1"># pandas:</span>
<span class="c1"># A B</span>
<span class="c1"># 0 2.0 1.0</span>
<span class="c1"># 1 5.0 NaN</span>
<span class="c1"># 2 5.0 1.0</span>
<span class="c1"># 3 5.0 4.0</span>
<span class="c1"># 4 5.0 9.0</span>
<span class="c1">#</span>
<span class="c1"># PySpark:</span>
<span class="c1"># A B</span>
<span class="c1"># 0 2.0 1.0</span>
<span class="c1"># 1 5.0 1.0</span>
<span class="c1"># 2 5.0 1.0</span>
<span class="c1"># 3 5.0 4.0</span>
<span class="c1"># 4 5.0 9.0</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="c1"># Manually sets nulls given the column defined above.</span>
<span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span>
<span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Here, we use two Windows.</span>
<span class="c1"># One for real data.</span>
<span class="c1"># The other one for setting nulls after the first null it meets.</span>
<span class="c1">#</span>
<span class="c1"># There is a behavior difference between pandas and PySpark. In case of cummax,</span>
<span class="c1">#</span>
<span class="c1"># Input:</span>
<span class="c1"># A B</span>
<span class="c1"># 0 2.0 1.0</span>
<span class="c1"># 1 5.0 NaN</span>
<span class="c1"># 2 1.0 0.0</span>
<span class="c1"># 3 2.0 4.0</span>
<span class="c1"># 4 4.0 9.0</span>
<span class="c1">#</span>
<span class="c1"># pandas:</span>
<span class="c1"># A B</span>
<span class="c1"># 0 2.0 1.0</span>
<span class="c1"># 1 5.0 NaN</span>
<span class="c1"># 2 5.0 NaN</span>
<span class="c1"># 3 5.0 NaN</span>
<span class="c1"># 4 5.0 NaN</span>
<span class="c1">#</span>
<span class="c1"># PySpark:</span>
<span class="c1"># A B</span>
<span class="c1"># 0 2.0 1.0</span>
<span class="c1"># 1 5.0 1.0</span>
<span class="c1"># 2 5.0 1.0</span>
<span class="c1"># 3 5.0 4.0</span>
<span class="c1"># 4 5.0 9.0</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="c1"># By going through with max, it sets True after the first time it meets null.</span>
<span class="n">F</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">())</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">),</span>
<span class="c1"># Manually sets nulls given the column defined above.</span>
<span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_cumsum</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span> <span class="n">part_cols</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="s2">&quot;ColumnOrName&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">())</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="n">psser</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">):</span>
<span class="n">psser</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="k">lambda</span> <span class="n">scol</span><span class="p">:</span> <span class="n">scol</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">LongType</span><span class="p">()))</span>
<span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;Could not convert </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">) to numeric&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">),</span>
<span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">(),</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">psser</span><span class="o">.</span><span class="n">_cum</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">sum</span><span class="p">,</span> <span class="n">skipna</span><span class="p">,</span> <span class="n">part_cols</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_cumprod</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span> <span class="n">part_cols</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="s2">&quot;ColumnOrName&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">())</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="n">psser</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">):</span>
<span class="n">psser</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="k">lambda</span> <span class="n">scol</span><span class="p">:</span> <span class="n">scol</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">LongType</span><span class="p">()))</span>
<span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;Could not convert </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">) to numeric&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">),</span>
<span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">(),</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">psser</span><span class="o">.</span><span class="n">_cum</span><span class="p">(</span><span class="k">lambda</span> <span class="n">c</span><span class="p">:</span> <span class="n">SF</span><span class="o">.</span><span class="n">product</span><span class="p">(</span><span class="n">c</span><span class="p">,</span> <span class="n">skipna</span><span class="p">),</span> <span class="n">skipna</span><span class="p">,</span> <span class="n">part_cols</span><span class="p">)</span>
<span class="c1"># ----------------------------------------------------------------------</span>
<span class="c1"># Accessor Methods</span>
<span class="c1"># ----------------------------------------------------------------------</span>
<span class="n">dt</span> <span class="o">=</span> <span class="n">CachedAccessor</span><span class="p">(</span><span class="s2">&quot;dt&quot;</span><span class="p">,</span> <span class="n">DatetimeMethods</span><span class="p">)</span>
<span class="nb">str</span> <span class="o">=</span> <span class="n">CachedAccessor</span><span class="p">(</span><span class="s2">&quot;str&quot;</span><span class="p">,</span> <span class="n">StringMethods</span><span class="p">)</span>
<span class="n">cat</span> <span class="o">=</span> <span class="n">CachedAccessor</span><span class="p">(</span><span class="s2">&quot;cat&quot;</span><span class="p">,</span> <span class="n">CategoricalAccessor</span><span class="p">)</span>
<span class="n">plot</span> <span class="o">=</span> <span class="n">CachedAccessor</span><span class="p">(</span><span class="s2">&quot;plot&quot;</span><span class="p">,</span> <span class="n">PandasOnSparkPlotAccessor</span><span class="p">)</span>
<span class="c1"># ----------------------------------------------------------------------</span>
<span class="k">def</span> <span class="nf">_apply_series_op</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">op</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="s2">&quot;Series&quot;</span><span class="p">],</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">PySparkColumn</span><span class="p">]],</span> <span class="n">should_resolve</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="n">psser_or_scol</span> <span class="o">=</span> <span class="n">op</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psser_or_scol</span><span class="p">,</span> <span class="n">Series</span><span class="p">):</span>
<span class="n">psser</span> <span class="o">=</span> <span class="n">psser_or_scol</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">psser</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">psser_or_scol</span><span class="p">)</span>
<span class="k">if</span> <span class="n">should_resolve</span><span class="p">:</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">resolved_copy</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">psser</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">_reduce_for_stat_function</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">sfun</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="s2">&quot;Series&quot;</span><span class="p">],</span> <span class="n">PySparkColumn</span><span class="p">],</span>
<span class="n">name</span><span class="p">:</span> <span class="n">str_type</span><span class="p">,</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Scalar</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Applies sfun to the column and returns a scalar</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> sfun : the stats function to be used for aggregation</span>
<span class="sd"> name : original pandas API name.</span>
<span class="sd"> axis : used only for sanity check because series only support index axis.</span>
<span class="sd"> numeric_only : not used by this implementation, but passed down by stats functions.</span>
<span class="sd"> skipna: exclude NA/null values when computing the result.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span>
<span class="k">if</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Series does not support columns axis.&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">skipna</span> <span class="ow">and</span> <span class="n">get_option</span><span class="p">(</span><span class="s2">&quot;compute.eager_check&quot;</span><span class="p">)</span> <span class="ow">and</span> <span class="bp">self</span><span class="o">.</span><span class="n">hasnans</span><span class="p">:</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">first</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">sfun</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="n">min_count</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;min_count&quot;</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
<span class="k">if</span> <span class="n">min_count</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">Frame</span><span class="o">.</span><span class="n">_count_expr</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="n">min_count</span><span class="p">,</span> <span class="n">scol</span><span class="p">)</span>
<span class="n">result</span> <span class="o">=</span> <span class="n">unpack_scalar</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">scol</span><span class="p">))</span>
<span class="k">return</span> <span class="n">result</span> <span class="k">if</span> <span class="n">result</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="c1"># Override the `groupby` to specify the actual return type annotation.</span>
<div class="viewcode-block" id="Series.groupby"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.groupby.html#pyspark.pandas.Series.groupby">[docs]</a> <span class="k">def</span> <span class="nf">groupby</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">by</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]]],</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="n">as_index</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">dropna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;SeriesGroupBy&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span>
<span class="s2">&quot;SeriesGroupBy&quot;</span><span class="p">,</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="n">by</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> <span class="n">as_index</span><span class="o">=</span><span class="n">as_index</span><span class="p">,</span> <span class="n">dropna</span><span class="o">=</span><span class="n">dropna</span><span class="p">)</span>
<span class="p">)</span></div>
<span class="n">groupby</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">Frame</span><span class="o">.</span><span class="n">groupby</span><span class="o">.</span><span class="vm">__doc__</span>
<span class="k">def</span> <span class="nf">_build_groupby</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">by</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">Label</span><span class="p">]],</span> <span class="n">as_index</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span> <span class="n">dropna</span><span class="p">:</span> <span class="nb">bool</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;SeriesGroupBy&quot;</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.groupby</span> <span class="kn">import</span> <span class="n">SeriesGroupBy</span>
<span class="k">return</span> <span class="n">SeriesGroupBy</span><span class="o">.</span><span class="n">_build</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">by</span><span class="p">,</span> <span class="n">as_index</span><span class="o">=</span><span class="n">as_index</span><span class="p">,</span> <span class="n">dropna</span><span class="o">=</span><span class="n">dropna</span><span class="p">)</span>
<div class="viewcode-block" id="Series.resample"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.resample.html#pyspark.pandas.Series.resample">[docs]</a> <span class="k">def</span> <span class="nf">resample</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">rule</span><span class="p">:</span> <span class="n">str_type</span><span class="p">,</span>
<span class="n">closed</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">str_type</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">label</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">str_type</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">on</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;SeriesResampler&quot;</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Resample time-series data.</span>
<span class="sd"> Convenience method for frequency conversion and resampling of time series.</span>
<span class="sd"> The object must have a datetime-like index (only support `DatetimeIndex` for now),</span>
<span class="sd"> or the caller must pass the label of a datetime-like</span>
<span class="sd"> series/index to the ``on`` keyword parameter.</span>
<span class="sd"> .. versionadded:: 3.4.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> rule : str</span>
<span class="sd"> The offset string or object representing target conversion.</span>
<span class="sd"> Currently, supported units are {&#39;YE&#39;, &#39;A&#39;, &#39;ME&#39;, &#39;D&#39;, &#39;h&#39;,</span>
<span class="sd"> &#39;min&#39;, &#39;MIN&#39;, &#39;s&#39;}.</span>
<span class="sd"> closed : {{&#39;right&#39;, &#39;left&#39;}}, default None</span>
<span class="sd"> Which side of bin interval is closed. The default is &#39;left&#39;</span>
<span class="sd"> for all frequency offsets except for &#39;A&#39;, &#39;YE&#39; and &#39;ME&#39; which all</span>
<span class="sd"> have a default of &#39;right&#39;.</span>
<span class="sd"> label : {{&#39;right&#39;, &#39;left&#39;}}, default None</span>
<span class="sd"> Which bin edge label to label bucket with. The default is &#39;left&#39;</span>
<span class="sd"> for all frequency offsets except for &#39;A&#39;, &#39;YE&#39; and &#39;ME&#39; which all</span>
<span class="sd"> have a default of &#39;right&#39;.</span>
<span class="sd"> on : Series, optional</span>
<span class="sd"> For a DataFrame, column to use instead of index for resampling.</span>
<span class="sd"> Column must be datetime-like.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> SeriesResampler</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Start by creating a series with 9 one minute timestamps.</span>
<span class="sd"> &gt;&gt;&gt; index = pd.date_range(&#39;1/1/2000&#39;, periods=9, freq=&#39;T&#39;)</span>
<span class="sd"> &gt;&gt;&gt; series = ps.Series(range(9), index=index, name=&#39;V&#39;)</span>
<span class="sd"> &gt;&gt;&gt; series</span>
<span class="sd"> 2000-01-01 00:00:00 0</span>
<span class="sd"> 2000-01-01 00:01:00 1</span>
<span class="sd"> 2000-01-01 00:02:00 2</span>
<span class="sd"> 2000-01-01 00:03:00 3</span>
<span class="sd"> 2000-01-01 00:04:00 4</span>
<span class="sd"> 2000-01-01 00:05:00 5</span>
<span class="sd"> 2000-01-01 00:06:00 6</span>
<span class="sd"> 2000-01-01 00:07:00 7</span>
<span class="sd"> 2000-01-01 00:08:00 8</span>
<span class="sd"> Name: V, dtype: int64</span>
<span class="sd"> Downsample the series into 3 minute bins and sum the values</span>
<span class="sd"> of the timestamps falling into a bin.</span>
<span class="sd"> &gt;&gt;&gt; series.resample(&#39;3T&#39;).sum().sort_index()</span>
<span class="sd"> 2000-01-01 00:00:00 3.0</span>
<span class="sd"> 2000-01-01 00:03:00 12.0</span>
<span class="sd"> 2000-01-01 00:06:00 21.0</span>
<span class="sd"> Name: V, dtype: float64</span>
<span class="sd"> Downsample the series into 3 minute bins as above, but label each</span>
<span class="sd"> bin using the right edge instead of the left. Please note that the</span>
<span class="sd"> value in the bucket used as the label is not included in the bucket,</span>
<span class="sd"> which it labels. For example, in the original series the</span>
<span class="sd"> bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed</span>
<span class="sd"> value in the resampled bucket with the label ``2000-01-01 00:03:00``</span>
<span class="sd"> does not include 3 (if it did, the summed value would be 6, not 3).</span>
<span class="sd"> To include this value, close the right side of the bin interval as</span>
<span class="sd"> illustrated in the example below this one.</span>
<span class="sd"> &gt;&gt;&gt; series.resample(&#39;3T&#39;, label=&#39;right&#39;).sum().sort_index()</span>
<span class="sd"> 2000-01-01 00:03:00 3.0</span>
<span class="sd"> 2000-01-01 00:06:00 12.0</span>
<span class="sd"> 2000-01-01 00:09:00 21.0</span>
<span class="sd"> Name: V, dtype: float64</span>
<span class="sd"> Downsample the series into 3 minute bins as above, but close the right</span>
<span class="sd"> side of the bin interval.</span>
<span class="sd"> &gt;&gt;&gt; series.resample(&#39;3T&#39;, label=&#39;right&#39;, closed=&#39;right&#39;).sum().sort_index()</span>
<span class="sd"> 2000-01-01 00:00:00 0.0</span>
<span class="sd"> 2000-01-01 00:03:00 6.0</span>
<span class="sd"> 2000-01-01 00:06:00 15.0</span>
<span class="sd"> 2000-01-01 00:09:00 15.0</span>
<span class="sd"> Name: V, dtype: float64</span>
<span class="sd"> Upsample the series into 30 second bins.</span>
<span class="sd"> &gt;&gt;&gt; series.resample(&#39;30S&#39;).sum().sort_index()[0:5] # Select first 5 rows</span>
<span class="sd"> 2000-01-01 00:00:00 0.0</span>
<span class="sd"> 2000-01-01 00:00:30 0.0</span>
<span class="sd"> 2000-01-01 00:01:00 1.0</span>
<span class="sd"> 2000-01-01 00:01:30 0.0</span>
<span class="sd"> 2000-01-01 00:02:00 2.0</span>
<span class="sd"> Name: V, dtype: float64</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> DataFrame.resample : Resample a DataFrame.</span>
<span class="sd"> groupby : Group by mapping, function, label, or list of labels.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.indexes</span> <span class="kn">import</span> <span class="n">DatetimeIndex</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.resample</span> <span class="kn">import</span> <span class="n">SeriesResampler</span>
<span class="k">if</span> <span class="n">on</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">,</span> <span class="n">DatetimeIndex</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;resample currently works only for DatetimeIndex&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">on</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">as_spark_type</span><span class="p">(</span><span class="n">on</span><span class="o">.</span><span class="n">dtype</span><span class="p">),</span> <span class="n">TimestampType</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;`on` currently works only for TimestampType&quot;</span><span class="p">)</span>
<span class="n">agg_columns</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="n">column_label</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_type_for</span><span class="p">(</span><span class="n">column_label</span><span class="p">),</span> <span class="p">(</span><span class="n">NumericType</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">)):</span>
<span class="n">agg_columns</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">agg_columns</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;No available aggregation columns!&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">SeriesResampler</span><span class="p">(</span>
<span class="n">psser</span><span class="o">=</span><span class="bp">self</span><span class="p">,</span>
<span class="n">resamplekey</span><span class="o">=</span><span class="n">on</span><span class="p">,</span>
<span class="n">rule</span><span class="o">=</span><span class="n">rule</span><span class="p">,</span>
<span class="n">closed</span><span class="o">=</span><span class="n">closed</span><span class="p">,</span>
<span class="n">label</span><span class="o">=</span><span class="n">label</span><span class="p">,</span>
<span class="n">agg_columns</span><span class="o">=</span><span class="n">agg_columns</span><span class="p">,</span>
<span class="p">)</span></div>
<span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">type</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> <span class="o">==</span> <span class="nb">int</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="p">(</span><span class="n">IntegerType</span><span class="p">,</span> <span class="n">LongType</span><span class="p">)):</span>
<span class="n">warnings</span><span class="o">.</span><span class="n">warn</span><span class="p">(</span>
<span class="s2">&quot;Series.__getitem__ treating keys as positions is deprecated. &quot;</span>
<span class="s2">&quot;In a future version, integer keys will always be treated as labels &quot;</span>
<span class="s2">&quot;(consistent with DataFrame behavior). &quot;</span>
<span class="s2">&quot;To access a value by position, use `ser.iloc[pos]`&quot;</span><span class="p">,</span>
<span class="ne">FutureWarning</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">if</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="nb">slice</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">any</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">n</span><span class="p">)</span> <span class="o">==</span> <span class="nb">int</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="p">[</span><span class="n">key</span><span class="o">.</span><span class="n">start</span><span class="p">,</span> <span class="n">key</span><span class="o">.</span><span class="n">stop</span><span class="p">]))</span> <span class="ow">or</span> <span class="p">(</span>
<span class="nb">type</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> <span class="o">==</span> <span class="nb">int</span>
<span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="p">(</span><span class="n">IntegerType</span><span class="p">,</span> <span class="n">LongType</span><span class="p">))</span>
<span class="p">):</span>
<span class="c1"># Seems like pandas Series always uses int as positional search when slicing</span>
<span class="c1"># with ints, searches based on index values when the value is int.</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
<span class="k">except</span> <span class="n">SparkPandasIndexingError</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span>
<span class="s2">&quot;Key length (</span><span class="si">{}</span><span class="s2">) exceeds index depth (</span><span class="si">{}</span><span class="s2">)&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="nb">len</span><span class="p">(</span><span class="n">key</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_level</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="n">str_type</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="n">item</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">&quot;__&quot;</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">MissingPandasLikeSeries</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
<span class="n">property_or_func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">MissingPandasLikeSeries</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="nb">property</span><span class="p">):</span>
<span class="k">return</span> <span class="n">property_or_func</span><span class="o">.</span><span class="n">fget</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">partial</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="s2">&quot;&#39;Series&#39; object has no attribute &#39;</span><span class="si">{}</span><span class="s2">&#39;&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">item</span><span class="p">))</span>
<span class="k">def</span> <span class="nf">_to_internal_pandas</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return a pandas Series directly from _internal to avoid overhead of copy.</span>
<span class="sd"> This method is for internal use only.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">to_pandas_frame</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]</span>
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">str_type</span><span class="p">:</span>
<span class="n">max_display_count</span> <span class="o">=</span> <span class="n">get_option</span><span class="p">(</span><span class="s2">&quot;display.max_rows&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">max_display_count</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">to_string</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="nb">bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">pser</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_get_or_create_repr_pandas_cache</span><span class="p">(</span><span class="n">max_display_count</span><span class="p">)[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]</span>
<span class="n">pser_length</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">pser</span><span class="p">)</span>
<span class="n">pser</span> <span class="o">=</span> <span class="n">pser</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:</span><span class="n">max_display_count</span><span class="p">]</span>
<span class="k">if</span> <span class="n">pser_length</span> <span class="o">&gt;</span> <span class="n">max_display_count</span><span class="p">:</span>
<span class="n">repr_string</span> <span class="o">=</span> <span class="n">pser</span><span class="o">.</span><span class="n">to_string</span><span class="p">(</span><span class="n">length</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">rest</span><span class="p">,</span> <span class="n">prev_footer</span> <span class="o">=</span> <span class="n">repr_string</span><span class="o">.</span><span class="n">rsplit</span><span class="p">(</span><span class="s2">&quot;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">match</span> <span class="o">=</span> <span class="n">REPR_PATTERN</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="n">prev_footer</span><span class="p">)</span>
<span class="k">if</span> <span class="n">match</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">length</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="s2">&quot;length&quot;</span><span class="p">)</span>
<span class="n">dtype_name</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">footer</span> <span class="o">=</span> <span class="s2">&quot;</span><span class="se">\n</span><span class="s2">dtype: </span><span class="si">{dtype}</span><span class="se">\n</span><span class="s2">Showing only the first </span><span class="si">{length}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">length</span><span class="o">=</span><span class="n">length</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">pprint_thing</span><span class="p">(</span><span class="n">dtype_name</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">footer</span> <span class="o">=</span> <span class="p">(</span>
<span class="s2">&quot;</span><span class="se">\n</span><span class="s2">Name: </span><span class="si">{name}</span><span class="s2">, dtype: </span><span class="si">{dtype}</span><span class="s2">&quot;</span>
<span class="s2">&quot;</span><span class="se">\n</span><span class="s2">Showing only the first </span><span class="si">{length}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">length</span><span class="o">=</span><span class="n">length</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">pprint_thing</span><span class="p">(</span><span class="n">dtype_name</span><span class="p">)</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">rest</span> <span class="o">+</span> <span class="n">footer</span>
<span class="k">return</span> <span class="n">pser</span><span class="o">.</span><span class="n">to_string</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
<span class="k">def</span> <span class="fm">__dir__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">str_type</span><span class="p">]:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">StructType</span><span class="p">):</span>
<span class="n">fields</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">fields</span> <span class="o">=</span> <span class="p">[</span><span class="n">f</span> <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="o">.</span><span class="n">fieldNames</span><span class="p">()</span> <span class="k">if</span> <span class="s2">&quot; &quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">f</span><span class="p">]</span>
<span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__dir__</span><span class="p">())</span> <span class="o">+</span> <span class="n">fields</span>
<span class="k">def</span> <span class="fm">__iter__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="n">MissingPandasLikeSeries</span><span class="o">.</span><span class="fm">__iter__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="c1"># In order to support the type hints such as Series[...]. See DataFrame.__class_getitem__.</span>
<span class="k">def</span> <span class="nf">__class_getitem__</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Type</span><span class="p">[</span><span class="n">SeriesType</span><span class="p">]:</span>
<span class="k">return</span> <span class="n">create_type_for_series_type</span><span class="p">(</span><span class="n">params</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">unpack_scalar</span><span class="p">(</span><span class="n">sdf</span><span class="p">:</span> <span class="n">SparkDataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Takes a dataframe that is supposed to contain a single row with a single scalar value,</span>
<span class="sd"> and returns this value.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">lst</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">toPandas</span><span class="p">()</span>
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">lst</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">,</span> <span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">lst</span><span class="p">)</span>
<span class="n">row</span> <span class="o">=</span> <span class="n">lst</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">lst2</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">row</span><span class="p">)</span>
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">lst2</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">,</span> <span class="p">(</span><span class="n">row</span><span class="p">,</span> <span class="n">lst2</span><span class="p">)</span>
<span class="k">return</span> <span class="n">lst2</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">first_series</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Series</span><span class="p">:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">first_series</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">:</span>
<span class="o">...</span>
<span class="k">def</span> <span class="nf">first_series</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Series</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Takes a DataFrame and returns the first column of the DataFrame as a Series</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="p">(</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">)),</span> <span class="nb">type</span><span class="p">(</span><span class="n">df</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">):</span>
<span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span>
<span class="k">def</span> <span class="nf">_test</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">doctest</span>
<span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span>
<span class="kn">import</span> <span class="nn">pyspark.pandas.series</span>
<span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s2">&quot;SPARK_HOME&quot;</span><span class="p">])</span>
<span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span><span class="o">.</span><span class="n">series</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">globs</span><span class="p">[</span><span class="s2">&quot;ps&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span>
<span class="n">spark</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">master</span><span class="p">(</span><span class="s2">&quot;local[4]&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">&quot;pyspark.pandas.series tests&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span>
<span class="p">)</span>
<span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span>
<span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span><span class="o">.</span><span class="n">series</span><span class="p">,</span>
<span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span>
<span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span> <span class="o">|</span> <span class="n">doctest</span><span class="o">.</span><span class="n">NORMALIZE_WHITESPACE</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">spark</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span>
<span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span>
<span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="n">_test</span><span class="p">()</span>
</pre></div>
</article>
<footer class="bd-footer-article">
<div class="footer-article-items footer-article__inner">
<div class="footer-article-item"><!-- Previous / next buttons -->
<div class="prev-next-area">
</div></div>
</div>
</footer>
</div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../../../_static/scripts/bootstrap.js?digest=e353d410970836974a52"></script>
<script src="../../../_static/scripts/pydata-sphinx-theme.js?digest=e353d410970836974a52"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item"><p class="copyright">
Copyright @ 2024 The Apache Software Foundation, Licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
</p></div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 4.5.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item"><p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.13.3.
</p></div>
</div>
</div>
</footer>
</body>
</html>