blob: 4964081b5ecd70770c6ceab786e4afa8b47e0f05 [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>pyspark.pandas.series &#8212; PySpark 3.3.0 documentation</title>
<link rel="stylesheet" href="../../../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css">
<link rel="stylesheet"
href="../../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet"
href="../../../_static/vendor/open-sans_all/1.44.1/index.css">
<link rel="stylesheet"
href="../../../_static/vendor/lato_latin-ext/1.44.1/index.css">
<link rel="stylesheet" href="../../../_static/basic.css" type="text/css" />
<link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../../../_static/js/index.3da636dd464baa7582d2.js">
<script id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
<script src="../../../_static/jquery.js"></script>
<script src="../../../_static/underscore.js"></script>
<script src="../../../_static/doctools.js"></script>
<script src="../../../_static/language_data.js"></script>
<script src="../../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en" />
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main">
<div class="container-xl">
<a class="navbar-brand" href="../../../index.html">
<img src="../../../_static/spark-logo-reverse.png" class="logo" alt="logo" />
</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-menu" class="col-lg-9 collapse navbar-collapse">
<ul id="navbar-main-elements" class="navbar-nav mr-auto">
<li class="nav-item ">
<a class="nav-link" href="../../../getting_started/index.html">Getting Started</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../user_guide/index.html">User Guide</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../reference/index.html">API Reference</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../development/index.html">Development</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../../migration_guide/index.html">Migration Guide</a>
</li>
</ul>
<ul class="navbar-nav">
</ul>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<ul class="nav bd-sidenav">
</ul>
</nav>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<nav id="bd-toc-nav">
<ul class="nav section-nav flex-column">
</ul>
</nav>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<h1>Source code for pyspark.pandas.series</h1><div class="highlight"><pre>
<span></span><span class="c1">#</span>
<span class="c1"># Licensed to the Apache Software Foundation (ASF) under one or more</span>
<span class="c1"># contributor license agreements. See the NOTICE file distributed with</span>
<span class="c1"># this work for additional information regarding copyright ownership.</span>
<span class="c1"># The ASF licenses this file to You under the Apache License, Version 2.0</span>
<span class="c1"># (the &quot;License&quot;); you may not use this file except in compliance with</span>
<span class="c1"># the License. You may obtain a copy of the License at</span>
<span class="c1">#</span>
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="c1">#</span>
<span class="c1"># Unless required by applicable law or agreed to in writing, software</span>
<span class="c1"># distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span>
<span class="c1"># WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span>
<span class="c1"># See the License for the specific language governing permissions and</span>
<span class="c1"># limitations under the License.</span>
<span class="c1">#</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd">A wrapper class for Spark Column to behave similar to pandas Series.</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="kn">import</span> <span class="nn">datetime</span>
<span class="kn">import</span> <span class="nn">re</span>
<span class="kn">import</span> <span class="nn">inspect</span>
<span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">from</span> <span class="nn">collections.abc</span> <span class="kn">import</span> <span class="n">Mapping</span>
<span class="kn">from</span> <span class="nn">functools</span> <span class="kn">import</span> <span class="n">partial</span><span class="p">,</span> <span class="n">reduce</span>
<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">Any</span><span class="p">,</span>
<span class="n">Callable</span><span class="p">,</span>
<span class="n">Dict</span><span class="p">,</span>
<span class="n">Generic</span><span class="p">,</span>
<span class="n">IO</span><span class="p">,</span>
<span class="n">Iterable</span><span class="p">,</span>
<span class="n">List</span><span class="p">,</span>
<span class="n">Optional</span><span class="p">,</span>
<span class="n">Sequence</span><span class="p">,</span>
<span class="n">Tuple</span><span class="p">,</span>
<span class="n">Type</span><span class="p">,</span>
<span class="n">Union</span><span class="p">,</span>
<span class="n">cast</span><span class="p">,</span>
<span class="n">no_type_check</span><span class="p">,</span>
<span class="n">overload</span><span class="p">,</span>
<span class="n">TYPE_CHECKING</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
<span class="kn">import</span> <span class="nn">pandas</span> <span class="k">as</span> <span class="nn">pd</span>
<span class="kn">from</span> <span class="nn">pandas.core.accessor</span> <span class="kn">import</span> <span class="n">CachedAccessor</span>
<span class="kn">from</span> <span class="nn">pandas.io.formats.printing</span> <span class="kn">import</span> <span class="n">pprint_thing</span>
<span class="kn">from</span> <span class="nn">pandas.api.types</span> <span class="kn">import</span> <span class="p">(</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="n">is_list_like</span><span class="p">,</span>
<span class="n">is_hashable</span><span class="p">,</span>
<span class="n">CategoricalDtype</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pandas.tseries.frequencies</span> <span class="kn">import</span> <span class="n">DateOffset</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">F</span><span class="p">,</span> <span class="n">Column</span><span class="p">,</span> <span class="n">DataFrame</span> <span class="k">as</span> <span class="n">SparkDataFrame</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.types</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">ArrayType</span><span class="p">,</span>
<span class="n">BooleanType</span><span class="p">,</span>
<span class="n">DecimalType</span><span class="p">,</span>
<span class="n">DoubleType</span><span class="p">,</span>
<span class="n">FloatType</span><span class="p">,</span>
<span class="n">IntegerType</span><span class="p">,</span>
<span class="n">IntegralType</span><span class="p">,</span>
<span class="n">LongType</span><span class="p">,</span>
<span class="n">NumericType</span><span class="p">,</span>
<span class="n">Row</span><span class="p">,</span>
<span class="n">StructType</span><span class="p">,</span>
<span class="n">TimestampType</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pyspark.sql.window</span> <span class="kn">import</span> <span class="n">Window</span>
<span class="kn">from</span> <span class="nn">pyspark</span> <span class="kn">import</span> <span class="n">pandas</span> <span class="k">as</span> <span class="n">ps</span> <span class="c1"># For running doctests and reference resolution in PyCharm.</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas._typing</span> <span class="kn">import</span> <span class="n">Axis</span><span class="p">,</span> <span class="n">Dtype</span><span class="p">,</span> <span class="n">Label</span><span class="p">,</span> <span class="n">Name</span><span class="p">,</span> <span class="n">Scalar</span><span class="p">,</span> <span class="n">T</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.accessors</span> <span class="kn">import</span> <span class="n">PandasOnSparkSeriesMethods</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.categorical</span> <span class="kn">import</span> <span class="n">CategoricalAccessor</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.config</span> <span class="kn">import</span> <span class="n">get_option</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.base</span> <span class="kn">import</span> <span class="n">IndexOpsMixin</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.exceptions</span> <span class="kn">import</span> <span class="n">SparkPandasIndexingError</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.frame</span> <span class="kn">import</span> <span class="n">DataFrame</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.generic</span> <span class="kn">import</span> <span class="n">Frame</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.internal</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">InternalField</span><span class="p">,</span>
<span class="n">InternalFrame</span><span class="p">,</span>
<span class="n">DEFAULT_SERIES_NAME</span><span class="p">,</span>
<span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">,</span>
<span class="n">SPARK_DEFAULT_INDEX_NAME</span><span class="p">,</span>
<span class="n">SPARK_DEFAULT_SERIES_NAME</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.missing.series</span> <span class="kn">import</span> <span class="n">MissingPandasLikeSeries</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.plot</span> <span class="kn">import</span> <span class="n">PandasOnSparkPlotAccessor</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.ml</span> <span class="kn">import</span> <span class="n">corr</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.utils</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">combine_frames</span><span class="p">,</span>
<span class="n">is_name_like_tuple</span><span class="p">,</span>
<span class="n">is_name_like_value</span><span class="p">,</span>
<span class="n">name_like_string</span><span class="p">,</span>
<span class="n">same_anchor</span><span class="p">,</span>
<span class="n">scol_for</span><span class="p">,</span>
<span class="n">sql_conf</span><span class="p">,</span>
<span class="n">validate_arguments_and_invoke_function</span><span class="p">,</span>
<span class="n">validate_axis</span><span class="p">,</span>
<span class="n">validate_bool_kwarg</span><span class="p">,</span>
<span class="n">verify_temp_column_name</span><span class="p">,</span>
<span class="n">SPARK_CONF_ARROW_ENABLED</span><span class="p">,</span>
<span class="n">log_advice</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.datetimes</span> <span class="kn">import</span> <span class="n">DatetimeMethods</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.spark</span> <span class="kn">import</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">SF</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.spark.accessors</span> <span class="kn">import</span> <span class="n">SparkSeriesMethods</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.strings</span> <span class="kn">import</span> <span class="n">StringMethods</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.typedef</span> <span class="kn">import</span> <span class="p">(</span>
<span class="n">infer_return_type</span><span class="p">,</span>
<span class="n">spark_type_to_pandas_dtype</span><span class="p">,</span>
<span class="n">ScalarType</span><span class="p">,</span>
<span class="n">SeriesType</span><span class="p">,</span>
<span class="n">create_type_for_series_type</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">TYPE_CHECKING</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">pyspark.sql._typing</span> <span class="kn">import</span> <span class="n">ColumnOrName</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.groupby</span> <span class="kn">import</span> <span class="n">SeriesGroupBy</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.indexes</span> <span class="kn">import</span> <span class="n">Index</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.spark.accessors</span> <span class="kn">import</span> <span class="n">SparkIndexOpsMethods</span>
<span class="c1"># This regular expression pattern is complied and defined here to avoid to compile the same</span>
<span class="c1"># pattern every time it is used in _repr_ in Series.</span>
<span class="c1"># This pattern basically seeks the footer string from pandas&#39;</span>
<span class="n">REPR_PATTERN</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s2">&quot;Length: (?P&lt;length&gt;[0-9]+)&quot;</span><span class="p">)</span>
<span class="n">_flex_doc_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Return </span><span class="si">{desc}</span><span class="s2"> of series and other, element-wise (binary operator `</span><span class="si">{op_name}</span><span class="s2">`).</span>
<span class="s2">Equivalent to ``</span><span class="si">{equiv}</span><span class="s2">``</span>
<span class="s2">Parameters</span>
<span class="s2">----------</span>
<span class="s2">other : Series or scalar value</span>
<span class="s2">Returns</span>
<span class="s2">-------</span>
<span class="s2">Series</span>
<span class="s2"> The result of the operation.</span>
<span class="s2">See Also</span>
<span class="s2">--------</span>
<span class="s2">Series.</span><span class="si">{reverse}</span><span class="s2"></span>
<span class="si">{series_examples}</span><span class="s2"></span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">_add_example_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Examples</span>
<span class="s2">--------</span>
<span class="s2">&gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [2, 2, 4, np.nan],</span>
<span class="s2">... &#39;b&#39;: [2, np.nan, 2, np.nan]},</span>
<span class="s2">... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="s2">&gt;&gt;&gt; df</span>
<span class="s2"> a b</span>
<span class="s2">a 2.0 2.0</span>
<span class="s2">b 2.0 NaN</span>
<span class="s2">c 4.0 2.0</span>
<span class="s2">d NaN NaN</span>
<span class="s2">&gt;&gt;&gt; df.a.add(df.b)</span>
<span class="s2">a 4.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 6.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&gt;&gt;&gt; df.a.radd(df.b)</span>
<span class="s2">a 4.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 6.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">_sub_example_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Examples</span>
<span class="s2">--------</span>
<span class="s2">&gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [2, 2, 4, np.nan],</span>
<span class="s2">... &#39;b&#39;: [2, np.nan, 2, np.nan]},</span>
<span class="s2">... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="s2">&gt;&gt;&gt; df</span>
<span class="s2"> a b</span>
<span class="s2">a 2.0 2.0</span>
<span class="s2">b 2.0 NaN</span>
<span class="s2">c 4.0 2.0</span>
<span class="s2">d NaN NaN</span>
<span class="s2">&gt;&gt;&gt; df.a.subtract(df.b)</span>
<span class="s2">a 0.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 2.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&gt;&gt;&gt; df.a.rsub(df.b)</span>
<span class="s2">a 0.0</span>
<span class="s2">b NaN</span>
<span class="s2">c -2.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">_mul_example_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Examples</span>
<span class="s2">--------</span>
<span class="s2">&gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [2, 2, 4, np.nan],</span>
<span class="s2">... &#39;b&#39;: [2, np.nan, 2, np.nan]},</span>
<span class="s2">... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="s2">&gt;&gt;&gt; df</span>
<span class="s2"> a b</span>
<span class="s2">a 2.0 2.0</span>
<span class="s2">b 2.0 NaN</span>
<span class="s2">c 4.0 2.0</span>
<span class="s2">d NaN NaN</span>
<span class="s2">&gt;&gt;&gt; df.a.multiply(df.b)</span>
<span class="s2">a 4.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 8.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&gt;&gt;&gt; df.a.rmul(df.b)</span>
<span class="s2">a 4.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 8.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">_div_example_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Examples</span>
<span class="s2">--------</span>
<span class="s2">&gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [2, 2, 4, np.nan],</span>
<span class="s2">... &#39;b&#39;: [2, np.nan, 2, np.nan]},</span>
<span class="s2">... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="s2">&gt;&gt;&gt; df</span>
<span class="s2"> a b</span>
<span class="s2">a 2.0 2.0</span>
<span class="s2">b 2.0 NaN</span>
<span class="s2">c 4.0 2.0</span>
<span class="s2">d NaN NaN</span>
<span class="s2">&gt;&gt;&gt; df.a.divide(df.b)</span>
<span class="s2">a 1.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 2.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&gt;&gt;&gt; df.a.rdiv(df.b)</span>
<span class="s2">a 1.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 0.5</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">_pow_example_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Examples</span>
<span class="s2">--------</span>
<span class="s2">&gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [2, 2, 4, np.nan],</span>
<span class="s2">... &#39;b&#39;: [2, np.nan, 2, np.nan]},</span>
<span class="s2">... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="s2">&gt;&gt;&gt; df</span>
<span class="s2"> a b</span>
<span class="s2">a 2.0 2.0</span>
<span class="s2">b 2.0 NaN</span>
<span class="s2">c 4.0 2.0</span>
<span class="s2">d NaN NaN</span>
<span class="s2">&gt;&gt;&gt; df.a.pow(df.b)</span>
<span class="s2">a 4.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 16.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&gt;&gt;&gt; df.a.rpow(df.b)</span>
<span class="s2">a 4.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 16.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">_mod_example_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Examples</span>
<span class="s2">--------</span>
<span class="s2">&gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [2, 2, 4, np.nan],</span>
<span class="s2">... &#39;b&#39;: [2, np.nan, 2, np.nan]},</span>
<span class="s2">... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="s2">&gt;&gt;&gt; df</span>
<span class="s2"> a b</span>
<span class="s2">a 2.0 2.0</span>
<span class="s2">b 2.0 NaN</span>
<span class="s2">c 4.0 2.0</span>
<span class="s2">d NaN NaN</span>
<span class="s2">&gt;&gt;&gt; df.a.mod(df.b)</span>
<span class="s2">a 0.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 0.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&gt;&gt;&gt; df.a.rmod(df.b)</span>
<span class="s2">a 0.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 2.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="n">_floordiv_example_SERIES</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">Examples</span>
<span class="s2">--------</span>
<span class="s2">&gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [2, 2, 4, np.nan],</span>
<span class="s2">... &#39;b&#39;: [2, np.nan, 2, np.nan]},</span>
<span class="s2">... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="s2">&gt;&gt;&gt; df</span>
<span class="s2"> a b</span>
<span class="s2">a 2.0 2.0</span>
<span class="s2">b 2.0 NaN</span>
<span class="s2">c 4.0 2.0</span>
<span class="s2">d NaN NaN</span>
<span class="s2">&gt;&gt;&gt; df.a.floordiv(df.b)</span>
<span class="s2">a 1.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 2.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&gt;&gt;&gt; df.a.rfloordiv(df.b)</span>
<span class="s2">a 1.0</span>
<span class="s2">b NaN</span>
<span class="s2">c 0.0</span>
<span class="s2">d NaN</span>
<span class="s2">dtype: float64</span>
<span class="s2">&quot;&quot;&quot;</span>
<span class="c1"># Needed to disambiguate Series.str and str type</span>
<span class="n">str_type</span> <span class="o">=</span> <span class="nb">str</span>
<div class="viewcode-block" id="Series"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.html#pyspark.pandas.Series">[docs]</a><span class="k">class</span> <span class="nc">Series</span><span class="p">(</span><span class="n">Frame</span><span class="p">,</span> <span class="n">IndexOpsMixin</span><span class="p">,</span> <span class="n">Generic</span><span class="p">[</span><span class="n">T</span><span class="p">]):</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> pandas-on-Spark Series that corresponds to pandas Series logically. This holds Spark Column</span>
<span class="sd"> internally.</span>
<span class="sd"> :ivar _internal: an internal immutable Frame to manage metadata.</span>
<span class="sd"> :type _internal: InternalFrame</span>
<span class="sd"> :ivar _psdf: Parent&#39;s pandas-on-Spark DataFrame</span>
<span class="sd"> :type _psdf: ps.DataFrame</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> data : array-like, dict, or scalar value, pandas Series</span>
<span class="sd"> Contains data stored in Series</span>
<span class="sd"> Note that if `data` is a pandas Series, other arguments should not be used.</span>
<span class="sd"> index : array-like or Index (1d)</span>
<span class="sd"> Values must be hashable and have the same length as `data`.</span>
<span class="sd"> Non-unique index values are allowed. Will default to</span>
<span class="sd"> RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index</span>
<span class="sd"> sequence are used, the index will override the keys found in the</span>
<span class="sd"> dict.</span>
<span class="sd"> dtype : numpy.dtype or None</span>
<span class="sd"> If None, dtype will be inferred</span>
<span class="sd"> copy : boolean, default False</span>
<span class="sd"> Copy input data</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span> <span class="c1"># type: ignore[no-untyped-def]</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">fastpath</span><span class="o">=</span><span class="kc">False</span>
<span class="p">):</span>
<span class="k">assert</span> <span class="n">data</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_anchor</span><span class="p">:</span> <span class="n">DataFrame</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_col_label</span><span class="p">:</span> <span class="n">Label</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">):</span>
<span class="k">assert</span> <span class="n">dtype</span> <span class="ow">is</span> <span class="kc">None</span>
<span class="k">assert</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span>
<span class="k">assert</span> <span class="ow">not</span> <span class="n">copy</span>
<span class="k">assert</span> <span class="ow">not</span> <span class="n">fastpath</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_anchor</span> <span class="o">=</span> <span class="n">data</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_col_label</span> <span class="o">=</span> <span class="n">index</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span>
<span class="k">assert</span> <span class="n">index</span> <span class="ow">is</span> <span class="kc">None</span>
<span class="k">assert</span> <span class="n">dtype</span> <span class="ow">is</span> <span class="kc">None</span>
<span class="k">assert</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span>
<span class="k">assert</span> <span class="ow">not</span> <span class="n">copy</span>
<span class="k">assert</span> <span class="ow">not</span> <span class="n">fastpath</span>
<span class="n">s</span> <span class="o">=</span> <span class="n">data</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span>
<span class="n">data</span><span class="o">=</span><span class="n">data</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="n">index</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">dtype</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="n">copy</span><span class="p">,</span> <span class="n">fastpath</span><span class="o">=</span><span class="n">fastpath</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="o">.</span><span class="n">from_pandas</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">s</span><span class="p">))</span>
<span class="k">if</span> <span class="n">s</span><span class="o">.</span><span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">column_labels</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">])</span>
<span class="n">anchor</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_anchor</span> <span class="o">=</span> <span class="n">anchor</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_col_label</span> <span class="o">=</span> <span class="n">anchor</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="nb">object</span><span class="o">.</span><span class="fm">__setattr__</span><span class="p">(</span><span class="n">anchor</span><span class="p">,</span> <span class="s2">&quot;_psseries&quot;</span><span class="p">,</span> <span class="p">{</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">:</span> <span class="bp">self</span><span class="p">})</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">_psdf</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_anchor</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">_internal</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">InternalFrame</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">select_column</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">_column_label</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Label</span><span class="p">]:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_col_label</span>
<span class="k">def</span> <span class="nf">_update_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">psdf</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">assert</span> <span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span> <span class="o">==</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">],</span> <span class="p">(</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">,</span>
<span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">],</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_anchor</span> <span class="o">=</span> <span class="n">psdf</span>
<span class="nb">object</span><span class="o">.</span><span class="fm">__setattr__</span><span class="p">(</span><span class="n">psdf</span><span class="p">,</span> <span class="s2">&quot;_psseries&quot;</span><span class="p">,</span> <span class="p">{</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">:</span> <span class="bp">self</span><span class="p">})</span>
<span class="k">def</span> <span class="nf">_with_new_scol</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">scol</span><span class="p">:</span> <span class="n">Column</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">field</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">InternalField</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Copy pandas-on-Spark Series with the new Spark Column.</span>
<span class="sd"> :param scol: the new Spark Column</span>
<span class="sd"> :return: the copied Series</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">name</span> <span class="o">=</span> <span class="n">name_like_string</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name</span><span class="p">)],</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span>
<span class="n">field</span> <span class="k">if</span> <span class="n">field</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">or</span> <span class="n">field</span><span class="o">.</span><span class="n">struct_field</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">field</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">)</span>
<span class="p">],</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span>
<span class="n">spark</span><span class="p">:</span> <span class="s2">&quot;SparkIndexOpsMethods&quot;</span> <span class="o">=</span> <span class="n">CachedAccessor</span><span class="p">(</span> <span class="c1"># type: ignore[assignment]</span>
<span class="s2">&quot;spark&quot;</span><span class="p">,</span> <span class="n">SparkSeriesMethods</span>
<span class="p">)</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">dtypes</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Dtype</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;Return the dtype object of the underlying data.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(list(&#39;abc&#39;))</span>
<span class="sd"> &gt;&gt;&gt; s.dtype == s.dtypes</span>
<span class="sd"> True</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">axes</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="s2">&quot;Index&quot;</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return a list of the row axis labels.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; psser.axes</span>
<span class="sd"> [Int64Index([0, 1, 2], dtype=&#39;int64&#39;)]</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">]</span>
<span class="c1"># Arithmetic Operators</span>
<div class="viewcode-block" id="Series.add"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.add.html#pyspark.pandas.Series.add">[docs]</a> <span class="k">def</span> <span class="nf">add</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">+</span> <span class="n">other</span></div>
<span class="n">add</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Addition&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series + other&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;radd&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_add_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.radd"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.radd.html#pyspark.pandas.Series.radd">[docs]</a> <span class="k">def</span> <span class="nf">radd</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">other</span> <span class="o">+</span> <span class="bp">self</span></div>
<span class="n">radd</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Addition&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;+&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other + series&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;add&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_add_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.div"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.div.html#pyspark.pandas.Series.div">[docs]</a> <span class="k">def</span> <span class="nf">div</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">/</span> <span class="n">other</span></div>
<span class="n">div</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Floating division&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;/&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series / other&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;rdiv&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_div_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">divide</span> <span class="o">=</span> <span class="n">div</span>
<div class="viewcode-block" id="Series.rdiv"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rdiv.html#pyspark.pandas.Series.rdiv">[docs]</a> <span class="k">def</span> <span class="nf">rdiv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">other</span> <span class="o">/</span> <span class="bp">self</span></div>
<span class="n">rdiv</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Floating division&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;/&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other / series&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;div&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_div_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.truediv"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.truediv.html#pyspark.pandas.Series.truediv">[docs]</a> <span class="k">def</span> <span class="nf">truediv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">/</span> <span class="n">other</span></div>
<span class="n">truediv</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Floating division&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;/&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series / other&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;rtruediv&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_div_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.rtruediv"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rtruediv.html#pyspark.pandas.Series.rtruediv">[docs]</a> <span class="k">def</span> <span class="nf">rtruediv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">other</span> <span class="o">/</span> <span class="bp">self</span></div>
<span class="n">rtruediv</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Floating division&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;/&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other / series&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;truediv&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_div_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.mul"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.mul.html#pyspark.pandas.Series.mul">[docs]</a> <span class="k">def</span> <span class="nf">mul</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">*</span> <span class="n">other</span></div>
<span class="n">mul</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Multiplication&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;*&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series * other&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;rmul&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_mul_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">multiply</span> <span class="o">=</span> <span class="n">mul</span>
<div class="viewcode-block" id="Series.rmul"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rmul.html#pyspark.pandas.Series.rmul">[docs]</a> <span class="k">def</span> <span class="nf">rmul</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">other</span> <span class="o">*</span> <span class="bp">self</span></div>
<span class="n">rmul</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Multiplication&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;*&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other * series&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;mul&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_mul_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.sub"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.sub.html#pyspark.pandas.Series.sub">[docs]</a> <span class="k">def</span> <span class="nf">sub</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">-</span> <span class="n">other</span></div>
<span class="n">sub</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Subtraction&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;-&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series - other&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;rsub&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_sub_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">subtract</span> <span class="o">=</span> <span class="n">sub</span>
<div class="viewcode-block" id="Series.rsub"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rsub.html#pyspark.pandas.Series.rsub">[docs]</a> <span class="k">def</span> <span class="nf">rsub</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">other</span> <span class="o">-</span> <span class="bp">self</span></div>
<span class="n">rsub</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Subtraction&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;-&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other - series&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;sub&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_sub_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.mod"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.mod.html#pyspark.pandas.Series.mod">[docs]</a> <span class="k">def</span> <span class="nf">mod</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">%</span> <span class="n">other</span></div>
<span class="n">mod</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Modulo&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;%&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series </span><span class="si">% o</span><span class="s2">ther&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;rmod&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_mod_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.rmod"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rmod.html#pyspark.pandas.Series.rmod">[docs]</a> <span class="k">def</span> <span class="nf">rmod</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">other</span> <span class="o">%</span> <span class="bp">self</span></div>
<span class="n">rmod</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Modulo&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;%&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other </span><span class="si">% s</span><span class="s2">eries&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;mod&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_mod_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.pow"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.pow.html#pyspark.pandas.Series.pow">[docs]</a> <span class="k">def</span> <span class="nf">pow</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">**</span> <span class="n">other</span></div>
<span class="nb">pow</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Exponential power of series&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;**&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series ** other&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;rpow&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_pow_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.rpow"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rpow.html#pyspark.pandas.Series.rpow">[docs]</a> <span class="k">def</span> <span class="nf">rpow</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">other</span> <span class="o">**</span> <span class="bp">self</span></div>
<span class="n">rpow</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Exponential power&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;**&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other ** series&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;pow&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_pow_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.floordiv"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.floordiv.html#pyspark.pandas.Series.floordiv">[docs]</a> <span class="k">def</span> <span class="nf">floordiv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">//</span> <span class="n">other</span></div>
<span class="n">floordiv</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Integer division&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;//&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;series // other&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;rfloordiv&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_floordiv_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<div class="viewcode-block" id="Series.rfloordiv"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rfloordiv.html#pyspark.pandas.Series.rfloordiv">[docs]</a> <span class="k">def</span> <span class="nf">rfloordiv</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">other</span> <span class="o">//</span> <span class="bp">self</span></div>
<span class="n">rfloordiv</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">_flex_doc_SERIES</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">desc</span><span class="o">=</span><span class="s2">&quot;Reverse Integer division&quot;</span><span class="p">,</span>
<span class="n">op_name</span><span class="o">=</span><span class="s2">&quot;//&quot;</span><span class="p">,</span>
<span class="n">equiv</span><span class="o">=</span><span class="s2">&quot;other // series&quot;</span><span class="p">,</span>
<span class="n">reverse</span><span class="o">=</span><span class="s2">&quot;floordiv&quot;</span><span class="p">,</span>
<span class="n">series_examples</span><span class="o">=</span><span class="n">_floordiv_example_SERIES</span><span class="p">,</span>
<span class="p">)</span>
<span class="c1"># create accessor for pandas-on-Spark specific methods.</span>
<span class="n">pandas_on_spark</span> <span class="o">=</span> <span class="n">CachedAccessor</span><span class="p">(</span><span class="s2">&quot;pandas_on_spark&quot;</span><span class="p">,</span> <span class="n">PandasOnSparkSeriesMethods</span><span class="p">)</span>
<span class="c1"># keep the name &quot;koalas&quot; for backward compatibility.</span>
<span class="n">koalas</span> <span class="o">=</span> <span class="n">CachedAccessor</span><span class="p">(</span><span class="s2">&quot;koalas&quot;</span><span class="p">,</span> <span class="n">PandasOnSparkSeriesMethods</span><span class="p">)</span>
<span class="c1"># Comparison Operators</span>
<div class="viewcode-block" id="Series.eq"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.eq.html#pyspark.pandas.Series.eq">[docs]</a> <span class="k">def</span> <span class="nf">eq</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compare if the current value is equal to the other.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [1, 2, 3, 4],</span>
<span class="sd"> ... &#39;b&#39;: [1, np.nan, 1, np.nan]},</span>
<span class="sd"> ... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df.a == 1</span>
<span class="sd"> a True</span>
<span class="sd"> b False</span>
<span class="sd"> c False</span>
<span class="sd"> d False</span>
<span class="sd"> Name: a, dtype: bool</span>
<span class="sd"> &gt;&gt;&gt; df.b.eq(1)</span>
<span class="sd"> a True</span>
<span class="sd"> b False</span>
<span class="sd"> c True</span>
<span class="sd"> d False</span>
<span class="sd"> Name: b, dtype: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">==</span> <span class="n">other</span></div>
<span class="n">equals</span> <span class="o">=</span> <span class="n">eq</span>
<div class="viewcode-block" id="Series.gt"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.gt.html#pyspark.pandas.Series.gt">[docs]</a> <span class="k">def</span> <span class="nf">gt</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compare if the current value is greater than the other.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [1, 2, 3, 4],</span>
<span class="sd"> ... &#39;b&#39;: [1, np.nan, 1, np.nan]},</span>
<span class="sd"> ... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df.a &gt; 1</span>
<span class="sd"> a False</span>
<span class="sd"> b True</span>
<span class="sd"> c True</span>
<span class="sd"> d True</span>
<span class="sd"> Name: a, dtype: bool</span>
<span class="sd"> &gt;&gt;&gt; df.b.gt(1)</span>
<span class="sd"> a False</span>
<span class="sd"> b False</span>
<span class="sd"> c False</span>
<span class="sd"> d False</span>
<span class="sd"> Name: b, dtype: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">&gt;</span> <span class="n">other</span></div>
<div class="viewcode-block" id="Series.ge"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.ge.html#pyspark.pandas.Series.ge">[docs]</a> <span class="k">def</span> <span class="nf">ge</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compare if the current value is greater than or equal to the other.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [1, 2, 3, 4],</span>
<span class="sd"> ... &#39;b&#39;: [1, np.nan, 1, np.nan]},</span>
<span class="sd"> ... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df.a &gt;= 2</span>
<span class="sd"> a False</span>
<span class="sd"> b True</span>
<span class="sd"> c True</span>
<span class="sd"> d True</span>
<span class="sd"> Name: a, dtype: bool</span>
<span class="sd"> &gt;&gt;&gt; df.b.ge(2)</span>
<span class="sd"> a False</span>
<span class="sd"> b False</span>
<span class="sd"> c False</span>
<span class="sd"> d False</span>
<span class="sd"> Name: b, dtype: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">&gt;=</span> <span class="n">other</span></div>
<div class="viewcode-block" id="Series.lt"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.lt.html#pyspark.pandas.Series.lt">[docs]</a> <span class="k">def</span> <span class="nf">lt</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compare if the current value is less than the other.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [1, 2, 3, 4],</span>
<span class="sd"> ... &#39;b&#39;: [1, np.nan, 1, np.nan]},</span>
<span class="sd"> ... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df.a &lt; 1</span>
<span class="sd"> a False</span>
<span class="sd"> b False</span>
<span class="sd"> c False</span>
<span class="sd"> d False</span>
<span class="sd"> Name: a, dtype: bool</span>
<span class="sd"> &gt;&gt;&gt; df.b.lt(2)</span>
<span class="sd"> a True</span>
<span class="sd"> b False</span>
<span class="sd"> c True</span>
<span class="sd"> d False</span>
<span class="sd"> Name: b, dtype: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">&lt;</span> <span class="n">other</span></div>
<div class="viewcode-block" id="Series.le"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.le.html#pyspark.pandas.Series.le">[docs]</a> <span class="k">def</span> <span class="nf">le</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compare if the current value is less than or equal to the other.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [1, 2, 3, 4],</span>
<span class="sd"> ... &#39;b&#39;: [1, np.nan, 1, np.nan]},</span>
<span class="sd"> ... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df.a &lt;= 2</span>
<span class="sd"> a True</span>
<span class="sd"> b True</span>
<span class="sd"> c False</span>
<span class="sd"> d False</span>
<span class="sd"> Name: a, dtype: bool</span>
<span class="sd"> &gt;&gt;&gt; df.b.le(2)</span>
<span class="sd"> a True</span>
<span class="sd"> b False</span>
<span class="sd"> c True</span>
<span class="sd"> d False</span>
<span class="sd"> Name: b, dtype: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">&lt;=</span> <span class="n">other</span></div>
<div class="viewcode-block" id="Series.ne"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.ne.html#pyspark.pandas.Series.ne">[docs]</a> <span class="k">def</span> <span class="nf">ne</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compare if the current value is not equal to the other.</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [1, 2, 3, 4],</span>
<span class="sd"> ... &#39;b&#39;: [1, np.nan, 1, np.nan]},</span>
<span class="sd"> ... index=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], columns=[&#39;a&#39;, &#39;b&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df.a != 1</span>
<span class="sd"> a False</span>
<span class="sd"> b True</span>
<span class="sd"> c True</span>
<span class="sd"> d True</span>
<span class="sd"> Name: a, dtype: bool</span>
<span class="sd"> &gt;&gt;&gt; df.b.ne(1)</span>
<span class="sd"> a False</span>
<span class="sd"> b True</span>
<span class="sd"> c False</span>
<span class="sd"> d True</span>
<span class="sd"> Name: b, dtype: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span> <span class="o">!=</span> <span class="n">other</span></div>
<div class="viewcode-block" id="Series.divmod"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.divmod.html#pyspark.pandas.Series.divmod">[docs]</a> <span class="k">def</span> <span class="nf">divmod</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return Integer division and modulo of series and other, element-wise</span>
<span class="sd"> (binary operator `divmod`).</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series or scalar value</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> 2-Tuple of Series</span>
<span class="sd"> The result of the operation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.rdivmod</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">floordiv</span><span class="p">(</span><span class="n">other</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">mod</span><span class="p">(</span><span class="n">other</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.rdivmod"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rdivmod.html#pyspark.pandas.Series.rdivmod">[docs]</a> <span class="k">def</span> <span class="nf">rdivmod</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return Integer division and modulo of series and other, element-wise</span>
<span class="sd"> (binary operator `rdivmod`).</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series or scalar value</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> 2-Tuple of Series</span>
<span class="sd"> The result of the operation.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.divmod</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">rfloordiv</span><span class="p">(</span><span class="n">other</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">rmod</span><span class="p">(</span><span class="n">other</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.between"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.between.html#pyspark.pandas.Series.between">[docs]</a> <span class="k">def</span> <span class="nf">between</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">left</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="n">right</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="n">inclusive</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return boolean Series equivalent to left &lt;= series &lt;= right.</span>
<span class="sd"> This function returns a boolean vector containing `True` wherever the</span>
<span class="sd"> corresponding Series element is between the boundary values `left` and</span>
<span class="sd"> `right`. NA values are treated as `False`.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> left : scalar or list-like</span>
<span class="sd"> Left boundary.</span>
<span class="sd"> right : scalar or list-like</span>
<span class="sd"> Right boundary.</span>
<span class="sd"> inclusive : bool, default True</span>
<span class="sd"> Include boundaries.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series representing whether each element is between left and</span>
<span class="sd"> right (inclusive).</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.gt : Greater than of series and other.</span>
<span class="sd"> Series.lt : Less than of series and other.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> This function is equivalent to ``(left &lt;= ser) &amp; (ser &lt;= right)``</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([2, 0, 4, 8, np.nan])</span>
<span class="sd"> Boundary values are included by default:</span>
<span class="sd"> &gt;&gt;&gt; s.between(1, 4)</span>
<span class="sd"> 0 True</span>
<span class="sd"> 1 False</span>
<span class="sd"> 2 True</span>
<span class="sd"> 3 False</span>
<span class="sd"> 4 False</span>
<span class="sd"> dtype: bool</span>
<span class="sd"> With `inclusive` set to ``False`` boundary values are excluded:</span>
<span class="sd"> &gt;&gt;&gt; s.between(1, 4, inclusive=False)</span>
<span class="sd"> 0 True</span>
<span class="sd"> 1 False</span>
<span class="sd"> 2 False</span>
<span class="sd"> 3 False</span>
<span class="sd"> 4 False</span>
<span class="sd"> dtype: bool</span>
<span class="sd"> `left` and `right` can be any scalar value:</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&#39;Alice&#39;, &#39;Bob&#39;, &#39;Carol&#39;, &#39;Eve&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s.between(&#39;Anna&#39;, &#39;Daniel&#39;)</span>
<span class="sd"> 0 False</span>
<span class="sd"> 1 True</span>
<span class="sd"> 2 True</span>
<span class="sd"> 3 False</span>
<span class="sd"> dtype: bool</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">inclusive</span><span class="p">:</span>
<span class="n">lmask</span> <span class="o">=</span> <span class="bp">self</span> <span class="o">&gt;=</span> <span class="n">left</span>
<span class="n">rmask</span> <span class="o">=</span> <span class="bp">self</span> <span class="o">&lt;=</span> <span class="n">right</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">lmask</span> <span class="o">=</span> <span class="bp">self</span> <span class="o">&gt;</span> <span class="n">left</span>
<span class="n">rmask</span> <span class="o">=</span> <span class="bp">self</span> <span class="o">&lt;</span> <span class="n">right</span>
<span class="k">return</span> <span class="n">lmask</span> <span class="o">&amp;</span> <span class="n">rmask</span></div>
<div class="viewcode-block" id="Series.cov"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.cov.html#pyspark.pandas.Series.cov">[docs]</a> <span class="k">def</span> <span class="nf">cov</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">min_periods</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compute covariance with Series, excluding missing values.</span>
<span class="sd"> .. versionadded:: 3.3.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series</span>
<span class="sd"> Series with which to compute the covariance.</span>
<span class="sd"> min_periods : int, optional</span>
<span class="sd"> Minimum number of observations needed to have a valid result.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> float</span>
<span class="sd"> Covariance between Series and other</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.pandas.config import set_option, reset_option</span>
<span class="sd"> &gt;&gt;&gt; set_option(&quot;compute.ops_on_diff_frames&quot;, True)</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([0.90010907, 0.13484424, 0.62036035])</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([0.12528585, 0.26962463, 0.51111198])</span>
<span class="sd"> &gt;&gt;&gt; s1.cov(s2)</span>
<span class="sd"> -0.016857626527158744</span>
<span class="sd"> &gt;&gt;&gt; reset_option(&quot;compute.ops_on_diff_frames&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Series</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;unsupported type: </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">other</span><span class="p">))</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">np</span><span class="o">.</span><span class="n">issubdtype</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">number</span><span class="p">):</span> <span class="c1"># type: ignore[arg-type]</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;unsupported dtype: </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">np</span><span class="o">.</span><span class="n">issubdtype</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">number</span><span class="p">):</span> <span class="c1"># type: ignore[arg-type]</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;unsupported dtype: </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">other</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
<span class="n">min_periods</span> <span class="o">=</span> <span class="mi">1</span> <span class="k">if</span> <span class="n">min_periods</span> <span class="ow">is</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">min_periods</span>
<span class="k">if</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> <span class="n">other</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">combine_frames</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">(),</span> <span class="n">other</span><span class="o">.</span><span class="n">to_frame</span><span class="p">())</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_columns</span><span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">dropna</span><span class="p">()</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sdf</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="n">min_periods</span><span class="p">))</span> <span class="o">&lt;</span> <span class="n">min_periods</span><span class="p">:</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">covar_samp</span><span class="p">(</span><span class="o">*</span><span class="n">sdf</span><span class="o">.</span><span class="n">columns</span><span class="p">))</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">1</span><span class="p">)[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span></div>
<span class="c1"># TODO: NaN and None when ``arg`` is an empty dict</span>
<span class="c1"># TODO: Support ps.Series ``arg``</span>
<div class="viewcode-block" id="Series.map"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.map.html#pyspark.pandas.Series.map">[docs]</a> <span class="k">def</span> <span class="nf">map</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">arg</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Dict</span><span class="p">,</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Any</span><span class="p">],</span> <span class="n">Any</span><span class="p">],</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">],</span> <span class="n">na_action</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Map values of Series according to input correspondence.</span>
<span class="sd"> Used for substituting each value in a Series with another value,</span>
<span class="sd"> that may be derived from a function, a ``dict``.</span>
<span class="sd"> .. note:: make sure the size of the dictionary is not huge because it could</span>
<span class="sd"> downgrade the performance or throw OutOfMemoryError due to a huge</span>
<span class="sd"> expression within Spark. Consider the input as a functions as an</span>
<span class="sd"> alternative instead in this case.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> arg : function, dict or pd.Series</span>
<span class="sd"> Mapping correspondence.</span>
<span class="sd"> na_action :</span>
<span class="sd"> If `ignore`, propagate NA values, without passing them to the mapping correspondence.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Same index as caller.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.apply : For applying more complex functions on a Series.</span>
<span class="sd"> DataFrame.applymap : Apply a function elementwise on a whole DataFrame.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> When ``arg`` is a dictionary, values in Series that are not in the</span>
<span class="sd"> dictionary (as keys) are converted to ``None``. However, if the</span>
<span class="sd"> dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.</span>
<span class="sd"> provides a method for default values), then this default is used</span>
<span class="sd"> rather than ``None``.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&#39;cat&#39;, &#39;dog&#39;, None, &#39;rabbit&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 cat</span>
<span class="sd"> 1 dog</span>
<span class="sd"> 2 None</span>
<span class="sd"> 3 rabbit</span>
<span class="sd"> dtype: object</span>
<span class="sd"> ``map`` accepts a ``dict``. Values that are not found</span>
<span class="sd"> in the ``dict`` are converted to ``None``, unless the dict has a default</span>
<span class="sd"> value (e.g. ``defaultdict``):</span>
<span class="sd"> &gt;&gt;&gt; s.map({&#39;cat&#39;: &#39;kitten&#39;, &#39;dog&#39;: &#39;puppy&#39;})</span>
<span class="sd"> 0 kitten</span>
<span class="sd"> 1 puppy</span>
<span class="sd"> 2 None</span>
<span class="sd"> 3 None</span>
<span class="sd"> dtype: object</span>
<span class="sd"> It also accepts a pandas Series:</span>
<span class="sd"> &gt;&gt;&gt; pser = pd.Series([&#39;kitten&#39;, &#39;puppy&#39;], index=[&#39;cat&#39;, &#39;dog&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s.map(pser)</span>
<span class="sd"> 0 kitten</span>
<span class="sd"> 1 puppy</span>
<span class="sd"> 2 None</span>
<span class="sd"> 3 None</span>
<span class="sd"> dtype: object</span>
<span class="sd"> It also accepts a function:</span>
<span class="sd"> &gt;&gt;&gt; def format(x) -&gt; str:</span>
<span class="sd"> ... return &#39;I am a {}&#39;.format(x)</span>
<span class="sd"> &gt;&gt;&gt; s.map(format)</span>
<span class="sd"> 0 I am a cat</span>
<span class="sd"> 1 I am a dog</span>
<span class="sd"> 2 I am a None</span>
<span class="sd"> 3 I am a rabbit</span>
<span class="sd"> dtype: object</span>
<span class="sd"> To avoid applying the function to missing values (and keep them as NaN)</span>
<span class="sd"> na_action=&#39;ignore&#39; can be used:</span>
<span class="sd"> &gt;&gt;&gt; s.map(&#39;I am a {}&#39;.format, na_action=&#39;ignore&#39;)</span>
<span class="sd"> 0 I am a cat</span>
<span class="sd"> 1 I am a dog</span>
<span class="sd"> 2 None</span>
<span class="sd"> 3 I am a rabbit</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="p">(</span><span class="nb">dict</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">)):</span>
<span class="n">is_start</span> <span class="o">=</span> <span class="kc">True</span>
<span class="c1"># In case dictionary is empty.</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">False</span><span class="p">),</span> <span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">))</span>
<span class="k">for</span> <span class="n">to_replace</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">arg</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="k">if</span> <span class="n">is_start</span><span class="p">:</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="o">==</span> <span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">to_replace</span><span class="p">),</span> <span class="n">value</span><span class="p">)</span>
<span class="n">is_start</span> <span class="o">=</span> <span class="kc">False</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">current</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="o">==</span> <span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">to_replace</span><span class="p">),</span> <span class="n">value</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="s2">&quot;__missing__&quot;</span><span class="p">):</span>
<span class="n">tmp_val</span> <span class="o">=</span> <span class="n">arg</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">_NoValue</span><span class="p">]</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="c1"># Remove in case it&#39;s set in defaultdict.</span>
<span class="k">del</span> <span class="n">arg</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">_NoValue</span><span class="p">]</span> <span class="c1"># type: ignore[attr-defined]</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">current</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">tmp_val</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">current</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">)</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">current</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">pandas_on_spark</span><span class="o">.</span><span class="n">transform_batch</span><span class="p">(</span><span class="k">lambda</span> <span class="n">pser</span><span class="p">:</span> <span class="n">pser</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">arg</span><span class="p">,</span> <span class="n">na_action</span><span class="p">))</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">shape</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="nb">int</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;Return a tuple of the shape of the underlying data.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="p">),)</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">name</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Name</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;Return name of the Series.&quot;&quot;&quot;</span>
<span class="n">name</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span>
<span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">name</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">return</span> <span class="n">name</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">name</span>
<span class="nd">@name</span><span class="o">.</span><span class="n">setter</span>
<span class="k">def</span> <span class="nf">name</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="n">Name</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="c1"># TODO: Currently, changing index labels taking dictionary/Series is not supported.</span>
<div class="viewcode-block" id="Series.rename"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rename.html#pyspark.pandas.Series.rename">[docs]</a> <span class="k">def</span> <span class="nf">rename</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Any</span><span class="p">],</span> <span class="n">Any</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Alter Series index labels or name.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> index : scalar or function, optional</span>
<span class="sd"> Functions are transformations to apply to the index.</span>
<span class="sd"> Scalar will alter the Series.name attribute.</span>
<span class="sd"> inplace : bool, default False</span>
<span class="sd"> Whether to return a new Series. If True then value of copy is</span>
<span class="sd"> ignored.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with index labels or name altered.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rename(&quot;my_name&quot;) # scalar, changes Series.name</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> Name: my_name, dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rename(lambda x: x ** 2) # function, changes labels</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 4 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">index</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">pass</span>
<span class="k">if</span> <span class="n">callable</span><span class="p">(</span><span class="n">index</span><span class="p">):</span>
<span class="k">if</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;inplace&quot;</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;inplace True is not supported yet for a function &#39;index&#39;&quot;</span><span class="p">)</span>
<span class="n">frame</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span>
<span class="n">new_index_name</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">frame</span><span class="p">,</span> <span class="s2">&quot;__index_name__&quot;</span><span class="p">)</span>
<span class="n">frame</span><span class="p">[</span><span class="n">new_index_name</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
<span class="n">frame</span><span class="o">.</span><span class="n">set_index</span><span class="p">(</span><span class="n">new_index_name</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">frame</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">name</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">frame</span><span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">index</span><span class="p">,</span> <span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;&#39;index&#39; of </span><span class="si">%s</span><span class="s2"> type is not supported yet&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">index</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
<span class="k">elif</span> <span class="ow">not</span> <span class="n">is_hashable</span><span class="p">(</span><span class="n">index</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;Series.name must be a hashable type&quot;</span><span class="p">)</span>
<span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">index</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">):</span>
<span class="n">index</span> <span class="o">=</span> <span class="p">(</span><span class="n">index</span><span class="p">,)</span>
<span class="n">name</span> <span class="o">=</span> <span class="n">name_like_string</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
<span class="n">field</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">column_labels</span><span class="o">=</span><span class="p">[</span><span class="n">index</span><span class="p">],</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol</span><span class="p">],</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span><span class="n">field</span><span class="p">],</span>
<span class="n">column_label_names</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">psdf</span><span class="p">:</span> <span class="n">DataFrame</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">)</span>
<span class="k">if</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;inplace&quot;</span><span class="p">,</span> <span class="kc">False</span><span class="p">):</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_col_label</span> <span class="o">=</span> <span class="n">index</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.rename_axis"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rename_axis.html#pyspark.pandas.Series.rename_axis">[docs]</a> <span class="k">def</span> <span class="nf">rename_axis</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">mapper</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">index</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Set the name of the axis for the index or columns.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> mapper, index : scalar, list-like, dict-like or function, optional</span>
<span class="sd"> A scalar, list-like, dict-like or functions transformations to</span>
<span class="sd"> apply to the index values.</span>
<span class="sd"> inplace : bool, default False</span>
<span class="sd"> Modifies the object directly, instead of creating a new Series.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series, or None if `inplace` is True.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.rename : Alter Series index labels or name.</span>
<span class="sd"> DataFrame.rename : Alter DataFrame index labels or name.</span>
<span class="sd"> Index.rename : Set new names on index.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&quot;dog&quot;, &quot;cat&quot;, &quot;monkey&quot;], name=&quot;animal&quot;)</span>
<span class="sd"> &gt;&gt;&gt; s # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> 0 dog</span>
<span class="sd"> 1 cat</span>
<span class="sd"> 2 monkey</span>
<span class="sd"> Name: animal, dtype: object</span>
<span class="sd"> &gt;&gt;&gt; s.rename_axis(&quot;index&quot;).sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> index</span>
<span class="sd"> 0 dog</span>
<span class="sd"> 1 cat</span>
<span class="sd"> 2 monkey</span>
<span class="sd"> Name: animal, dtype: object</span>
<span class="sd"> **MultiIndex**</span>
<span class="sd"> &gt;&gt;&gt; index = pd.MultiIndex.from_product([[&#39;mammal&#39;],</span>
<span class="sd"> ... [&#39;dog&#39;, &#39;cat&#39;, &#39;monkey&#39;]],</span>
<span class="sd"> ... names=[&#39;type&#39;, &#39;name&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([4, 4, 2], index=index, name=&#39;num_legs&#39;)</span>
<span class="sd"> &gt;&gt;&gt; s # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> type name</span>
<span class="sd"> mammal dog 4</span>
<span class="sd"> cat 4</span>
<span class="sd"> monkey 2</span>
<span class="sd"> Name: num_legs, dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rename_axis(index={&#39;type&#39;: &#39;class&#39;}).sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> class name</span>
<span class="sd"> mammal cat 4</span>
<span class="sd"> dog 4</span>
<span class="sd"> monkey 2</span>
<span class="sd"> Name: num_legs, dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rename_axis(index=str.upper).sort_index() # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> TYPE NAME</span>
<span class="sd"> mammal cat 4</span>
<span class="sd"> dog 4</span>
<span class="sd"> monkey 2</span>
<span class="sd"> Name: num_legs, dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">rename_axis</span><span class="p">(</span><span class="n">mapper</span><span class="o">=</span><span class="n">mapper</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="n">index</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span></div>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">index</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;ps.Index&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;The index (axis labels) Column of the Series.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Index</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">index</span>
<span class="nd">@property</span>
<span class="k">def</span> <span class="nf">is_unique</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return boolean if values in the object are unique</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> is_unique : boolean</span>
<span class="sd"> &gt;&gt;&gt; ps.Series([1, 2, 3]).is_unique</span>
<span class="sd"> True</span>
<span class="sd"> &gt;&gt;&gt; ps.Series([1, 2, 2]).is_unique</span>
<span class="sd"> False</span>
<span class="sd"> &gt;&gt;&gt; ps.Series([1, 2, 3, None]).is_unique</span>
<span class="sd"> True</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="c1"># Here we check:</span>
<span class="c1"># 1. the distinct count without nulls and count without nulls for non-null values</span>
<span class="c1"># 2. count null values and see if null is a distinct value.</span>
<span class="c1">#</span>
<span class="c1"># This workaround is in order to calculate the distinct count including nulls in</span>
<span class="c1"># single pass. Note that COUNT(DISTINCT expr) in Spark is designed to ignore nulls.</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span> <span class="o">==</span> <span class="n">F</span><span class="o">.</span><span class="n">countDistinct</span><span class="p">(</span><span class="n">scol</span><span class="p">))</span>
<span class="o">&amp;</span> <span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="kc">None</span><span class="p">))</span> <span class="o">&lt;=</span> <span class="mi">1</span><span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">collect</span><span class="p">()[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span>
<div class="viewcode-block" id="Series.reset_index"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.reset_index.html#pyspark.pandas.Series.reset_index">[docs]</a> <span class="k">def</span> <span class="nf">reset_index</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">level</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Name</span><span class="p">,</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Name</span><span class="p">]]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">drop</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">name</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Name</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">]]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Generate a new DataFrame or Series with the index reset.</span>
<span class="sd"> This is useful when the index needs to be treated as a column,</span>
<span class="sd"> or when the index is meaningless and needs to be reset</span>
<span class="sd"> to the default before another operation.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> level : int, str, tuple, or list, default optional</span>
<span class="sd"> For a Series with a MultiIndex, only remove the specified levels from the index.</span>
<span class="sd"> Removes all levels by default.</span>
<span class="sd"> drop : bool, default False</span>
<span class="sd"> Just reset the index, without inserting it as a column in the new DataFrame.</span>
<span class="sd"> name : object, optional</span>
<span class="sd"> The name to use for the column containing the original Series values.</span>
<span class="sd"> Uses self.name by default. This argument is ignored when drop is True.</span>
<span class="sd"> inplace : bool, default False</span>
<span class="sd"> Modify the Series in place (do not create a new object).</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series or DataFrame</span>
<span class="sd"> When `drop` is False (the default), a DataFrame is returned.</span>
<span class="sd"> The newly created columns will come first in the DataFrame,</span>
<span class="sd"> followed by the original Series values.</span>
<span class="sd"> When `drop` is True, a `Series` is returned.</span>
<span class="sd"> In either case, if ``inplace=True``, no value is returned.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4], index=pd.Index([&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;], name=&#39;idx&#39;))</span>
<span class="sd"> Generate a DataFrame with default index.</span>
<span class="sd"> &gt;&gt;&gt; s.reset_index()</span>
<span class="sd"> idx 0</span>
<span class="sd"> 0 a 1</span>
<span class="sd"> 1 b 2</span>
<span class="sd"> 2 c 3</span>
<span class="sd"> 3 d 4</span>
<span class="sd"> To specify the name of the new column use `name`.</span>
<span class="sd"> &gt;&gt;&gt; s.reset_index(name=&#39;values&#39;)</span>
<span class="sd"> idx values</span>
<span class="sd"> 0 a 1</span>
<span class="sd"> 1 b 2</span>
<span class="sd"> 2 c 3</span>
<span class="sd"> 3 d 4</span>
<span class="sd"> To generate a new Series with the default set `drop` to True.</span>
<span class="sd"> &gt;&gt;&gt; s.reset_index(drop=True)</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> To update the Series in place, without generating a new one</span>
<span class="sd"> set `inplace` to True. Note that it also requires ``drop=True``.</span>
<span class="sd"> &gt;&gt;&gt; s.reset_index(inplace=True, drop=True)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">inplace</span> <span class="o">=</span> <span class="n">validate_bool_kwarg</span><span class="p">(</span><span class="n">inplace</span><span class="p">,</span> <span class="s2">&quot;inplace&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">drop</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;Cannot reset_index inplace on a Series to create a DataFrame&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">drop</span><span class="p">:</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">[[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">psser</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">psser</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="n">psdf</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="n">level</span><span class="p">,</span> <span class="n">drop</span><span class="o">=</span><span class="n">drop</span><span class="p">)</span>
<span class="k">if</span> <span class="n">drop</span><span class="p">:</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">psdf</span></div>
<div class="viewcode-block" id="Series.to_frame"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.to_frame.html#pyspark.pandas.Series.to_frame">[docs]</a> <span class="k">def</span> <span class="nf">to_frame</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">name</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Name</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Convert Series to DataFrame.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> name : object, default None</span>
<span class="sd"> The passed name should substitute for the series name (if it has</span>
<span class="sd"> one).</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> DataFrame</span>
<span class="sd"> DataFrame representation of Series.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])</span>
<span class="sd"> &gt;&gt;&gt; s.to_frame()</span>
<span class="sd"> 0</span>
<span class="sd"> 0 a</span>
<span class="sd"> 1 b</span>
<span class="sd"> 2 c</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;], name=&quot;vals&quot;)</span>
<span class="sd"> &gt;&gt;&gt; s.to_frame()</span>
<span class="sd"> vals</span>
<span class="sd"> 0 a</span>
<span class="sd"> 1 b</span>
<span class="sd"> 2 c</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">renamed</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
<span class="k">elif</span> <span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">renamed</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">DEFAULT_SERIES_NAME</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">renamed</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">renamed</span><span class="o">.</span><span class="n">_internal</span><span class="p">)</span></div>
<span class="n">to_dataframe</span> <span class="o">=</span> <span class="n">to_frame</span>
<div class="viewcode-block" id="Series.to_string"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.to_string.html#pyspark.pandas.Series.to_string">[docs]</a> <span class="k">def</span> <span class="nf">to_string</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">buf</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">IO</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">na_rep</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;NaN&quot;</span><span class="p">,</span>
<span class="n">float_format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Callable</span><span class="p">[[</span><span class="nb">float</span><span class="p">],</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">header</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">index</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">length</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">dtype</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">name</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">max_rows</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Render a string representation of the Series.</span>
<span class="sd"> .. note:: This method should only be used if the resulting pandas object is expected</span>
<span class="sd"> to be small, as all the data is loaded into the driver&#39;s memory. If the input</span>
<span class="sd"> is large, set max_rows parameter.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> buf : StringIO-like, optional</span>
<span class="sd"> buffer to write to</span>
<span class="sd"> na_rep : string, optional</span>
<span class="sd"> string representation of NAN to use, default &#39;NaN&#39;</span>
<span class="sd"> float_format : one-parameter function, optional</span>
<span class="sd"> formatter function to apply to columns&#39; elements if they are floats</span>
<span class="sd"> default None</span>
<span class="sd"> header : boolean, default True</span>
<span class="sd"> Add the Series header (index name)</span>
<span class="sd"> index : bool, optional</span>
<span class="sd"> Add index (row) labels, default True</span>
<span class="sd"> length : boolean, default False</span>
<span class="sd"> Add the Series length</span>
<span class="sd"> dtype : boolean, default False</span>
<span class="sd"> Add the Series dtype</span>
<span class="sd"> name : boolean, default False</span>
<span class="sd"> Add the Series name if not None</span>
<span class="sd"> max_rows : int, optional</span>
<span class="sd"> Maximum number of rows to show before truncating. If None, show</span>
<span class="sd"> all.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> formatted : string (if not buffer passed)</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)], columns=[&#39;dogs&#39;, &#39;cats&#39;])</span>
<span class="sd"> &gt;&gt;&gt; print(df[&#39;dogs&#39;].to_string())</span>
<span class="sd"> 0 0.2</span>
<span class="sd"> 1 0.0</span>
<span class="sd"> 2 0.6</span>
<span class="sd"> 3 0.2</span>
<span class="sd"> &gt;&gt;&gt; print(df[&#39;dogs&#39;].to_string(max_rows=2))</span>
<span class="sd"> 0 0.2</span>
<span class="sd"> 1 0.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Make sure locals() call is at the top of the function so we don&#39;t capture local variables.</span>
<span class="n">args</span> <span class="o">=</span> <span class="nb">locals</span><span class="p">()</span>
<span class="k">if</span> <span class="n">max_rows</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">psseries</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="n">max_rows</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">psseries</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">return</span> <span class="n">validate_arguments_and_invoke_function</span><span class="p">(</span>
<span class="n">psseries</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_string</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="o">.</span><span class="n">to_string</span><span class="p">,</span> <span class="n">args</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="Series.to_clipboard"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.to_clipboard.html#pyspark.pandas.Series.to_clipboard">[docs]</a> <span class="k">def</span> <span class="nf">to_clipboard</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">excel</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">sep</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># Docstring defined below by reusing DataFrame.to_clipboard&#39;s.</span>
<span class="n">args</span> <span class="o">=</span> <span class="nb">locals</span><span class="p">()</span>
<span class="n">psseries</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">return</span> <span class="n">validate_arguments_and_invoke_function</span><span class="p">(</span>
<span class="n">psseries</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_clipboard</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="o">.</span><span class="n">to_clipboard</span><span class="p">,</span> <span class="n">args</span>
<span class="p">)</span></div>
<span class="n">to_clipboard</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="o">.</span><span class="n">to_clipboard</span><span class="o">.</span><span class="vm">__doc__</span>
<div class="viewcode-block" id="Series.to_dict"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.to_dict.html#pyspark.pandas.Series.to_dict">[docs]</a> <span class="k">def</span> <span class="nf">to_dict</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">into</span><span class="p">:</span> <span class="n">Type</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Mapping</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Convert Series to {label -&gt; value} dict or dict-like object.</span>
<span class="sd"> .. note:: This method should only be used if the resulting pandas DataFrame is expected</span>
<span class="sd"> to be small, as all the data is loaded into the driver&#39;s memory.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> into : class, default dict</span>
<span class="sd"> The collections.abc.Mapping subclass to use as the return</span>
<span class="sd"> object. Can be the actual class or an empty</span>
<span class="sd"> instance of the mapping type you want. If you want a</span>
<span class="sd"> collections.defaultdict, you must pass it initialized.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> collections.abc.Mapping</span>
<span class="sd"> Key-value representation of Series.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s_dict = s.to_dict()</span>
<span class="sd"> &gt;&gt;&gt; sorted(s_dict.items())</span>
<span class="sd"> [(0, 1), (1, 2), (2, 3), (3, 4)]</span>
<span class="sd"> &gt;&gt;&gt; from collections import OrderedDict, defaultdict</span>
<span class="sd"> &gt;&gt;&gt; s.to_dict(OrderedDict)</span>
<span class="sd"> OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])</span>
<span class="sd"> &gt;&gt;&gt; dd = defaultdict(list)</span>
<span class="sd"> &gt;&gt;&gt; s.to_dict(dd) # doctest: +ELLIPSIS</span>
<span class="sd"> defaultdict(&lt;class &#39;list&#39;&gt;, {...})</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Make sure locals() call is at the top of the function so we don&#39;t capture local variables.</span>
<span class="n">args</span> <span class="o">=</span> <span class="nb">locals</span><span class="p">()</span>
<span class="n">psseries</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">return</span> <span class="n">validate_arguments_and_invoke_function</span><span class="p">(</span>
<span class="n">psseries</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_dict</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="o">.</span><span class="n">to_dict</span><span class="p">,</span> <span class="n">args</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="Series.to_latex"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.to_latex.html#pyspark.pandas.Series.to_latex">[docs]</a> <span class="k">def</span> <span class="nf">to_latex</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">buf</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">IO</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">columns</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">Name</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">col_space</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">header</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">index</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">na_rep</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;NaN&quot;</span><span class="p">,</span>
<span class="n">formatters</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span>
<span class="n">Union</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="n">Callable</span><span class="p">[[</span><span class="n">Any</span><span class="p">],</span> <span class="nb">str</span><span class="p">]],</span> <span class="n">Dict</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Any</span><span class="p">],</span> <span class="nb">str</span><span class="p">]]]</span>
<span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">float_format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Callable</span><span class="p">[[</span><span class="nb">float</span><span class="p">],</span> <span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">sparsify</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">index_names</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">bold_rows</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">column_format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">longtable</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">escape</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">encoding</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">decimal</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;.&quot;</span><span class="p">,</span>
<span class="n">multicolumn</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">multicolumn_format</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">multirow</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
<span class="n">args</span> <span class="o">=</span> <span class="nb">locals</span><span class="p">()</span>
<span class="n">psseries</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">return</span> <span class="n">validate_arguments_and_invoke_function</span><span class="p">(</span>
<span class="n">psseries</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">(),</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_latex</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="o">.</span><span class="n">to_latex</span><span class="p">,</span> <span class="n">args</span>
<span class="p">)</span></div>
<span class="n">to_latex</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="o">.</span><span class="n">to_latex</span><span class="o">.</span><span class="vm">__doc__</span>
<div class="viewcode-block" id="Series.to_pandas"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.to_pandas.html#pyspark.pandas.Series.to_pandas">[docs]</a> <span class="k">def</span> <span class="nf">to_pandas</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return a pandas Series.</span>
<span class="sd"> .. note:: This method should only be used if the resulting pandas object is expected</span>
<span class="sd"> to be small, as all the data is loaded into the driver&#39;s memory.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)], columns=[&#39;dogs&#39;, &#39;cats&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df[&#39;dogs&#39;].to_pandas()</span>
<span class="sd"> 0 0.2</span>
<span class="sd"> 1 0.0</span>
<span class="sd"> 2 0.6</span>
<span class="sd"> 3 0.2</span>
<span class="sd"> Name: dogs, dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">log_advice</span><span class="p">(</span>
<span class="s2">&quot;`to_pandas` loads all data into the driver&#39;s memory. &quot;</span>
<span class="s2">&quot;It should only be used if the resulting pandas Series is expected to be small.&quot;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_to_pandas</span><span class="p">()</span></div>
<span class="k">def</span> <span class="nf">_to_pandas</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Same as `to_pandas()`, without issueing the advice log for internal usage.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<div class="viewcode-block" id="Series.to_list"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.to_list.html#pyspark.pandas.Series.to_list">[docs]</a> <span class="k">def</span> <span class="nf">to_list</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return a list of the values.</span>
<span class="sd"> These are each a scalar type, which is a Python scalar</span>
<span class="sd"> (for str, int, float) or a pandas scalar</span>
<span class="sd"> (for Timestamp/Timedelta/Interval/Period)</span>
<span class="sd"> .. note:: This method should only be used if the resulting list is expected</span>
<span class="sd"> to be small, as all the data is loaded into the driver&#39;s memory.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">log_advice</span><span class="p">(</span>
<span class="s2">&quot;`to_list` loads all data into the driver&#39;s memory. &quot;</span>
<span class="s2">&quot;It should only be used if the resulting list is expected to be small.&quot;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span></div>
<span class="n">tolist</span> <span class="o">=</span> <span class="n">to_list</span>
<div class="viewcode-block" id="Series.drop_duplicates"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.drop_duplicates.html#pyspark.pandas.Series.drop_duplicates">[docs]</a> <span class="k">def</span> <span class="nf">drop_duplicates</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">keep</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">bool</span><span class="p">,</span> <span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;first&quot;</span><span class="p">,</span> <span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return Series with duplicate values removed.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> keep : {&#39;first&#39;, &#39;last&#39;, ``False``}, default &#39;first&#39;</span>
<span class="sd"> Method to handle dropping duplicates:</span>
<span class="sd"> - &#39;first&#39; : Drop duplicates except for the first occurrence.</span>
<span class="sd"> - &#39;last&#39; : Drop duplicates except for the last occurrence.</span>
<span class="sd"> - ``False`` : Drop all duplicates.</span>
<span class="sd"> inplace : bool, default ``False``</span>
<span class="sd"> If ``True``, performs operation inplace and returns None.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with duplicates dropped.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Generate a Series with duplicated entries.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&#39;lama&#39;, &#39;cow&#39;, &#39;lama&#39;, &#39;beetle&#39;, &#39;lama&#39;, &#39;hippo&#39;],</span>
<span class="sd"> ... name=&#39;animal&#39;)</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 0 lama</span>
<span class="sd"> 1 cow</span>
<span class="sd"> 2 lama</span>
<span class="sd"> 3 beetle</span>
<span class="sd"> 4 lama</span>
<span class="sd"> 5 hippo</span>
<span class="sd"> Name: animal, dtype: object</span>
<span class="sd"> With the &#39;keep&#39; parameter, the selection behaviour of duplicated values</span>
<span class="sd"> can be changed. The value &#39;first&#39; keeps the first occurrence for each</span>
<span class="sd"> set of duplicated entries. The default value of keep is &#39;first&#39;.</span>
<span class="sd"> &gt;&gt;&gt; s.drop_duplicates().sort_index()</span>
<span class="sd"> 0 lama</span>
<span class="sd"> 1 cow</span>
<span class="sd"> 3 beetle</span>
<span class="sd"> 5 hippo</span>
<span class="sd"> Name: animal, dtype: object</span>
<span class="sd"> The value &#39;last&#39; for parameter &#39;keep&#39; keeps the last occurrence for</span>
<span class="sd"> each set of duplicated entries.</span>
<span class="sd"> &gt;&gt;&gt; s.drop_duplicates(keep=&#39;last&#39;).sort_index()</span>
<span class="sd"> 1 cow</span>
<span class="sd"> 3 beetle</span>
<span class="sd"> 4 lama</span>
<span class="sd"> 5 hippo</span>
<span class="sd"> Name: animal, dtype: object</span>
<span class="sd"> The value ``False`` for parameter &#39;keep&#39; discards all sets of</span>
<span class="sd"> duplicated entries. Setting the value of &#39;inplace&#39; to ``True`` performs</span>
<span class="sd"> the operation inplace and returns ``None``.</span>
<span class="sd"> &gt;&gt;&gt; s.drop_duplicates(keep=False, inplace=True)</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 1 cow</span>
<span class="sd"> 3 beetle</span>
<span class="sd"> 5 hippo</span>
<span class="sd"> Name: animal, dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">inplace</span> <span class="o">=</span> <span class="n">validate_bool_kwarg</span><span class="p">(</span><span class="n">inplace</span><span class="p">,</span> <span class="s2">&quot;inplace&quot;</span><span class="p">)</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">[[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]]</span><span class="o">.</span><span class="n">drop_duplicates</span><span class="p">(</span><span class="n">keep</span><span class="o">=</span><span class="n">keep</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.reindex"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.reindex.html#pyspark.pandas.Series.reindex">[docs]</a> <span class="k">def</span> <span class="nf">reindex</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">index</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">fill_value</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Conform Series to new index with optional filling logic, placing</span>
<span class="sd"> NA/NaN in locations having no value in the previous index. A new object</span>
<span class="sd"> is produced.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> index: array-like, optional</span>
<span class="sd"> New labels / index to conform to, should be specified using keywords.</span>
<span class="sd"> Preferably an Index object to avoid duplicating data</span>
<span class="sd"> fill_value : scalar, default np.NaN</span>
<span class="sd"> Value to use for missing values. Defaults to NaN, but can be any</span>
<span class="sd"> &quot;compatible&quot; value.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series with changed index.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.reset_index : Remove row labels or move them to new columns.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Create a series with some fictional data.</span>
<span class="sd"> &gt;&gt;&gt; index = [&#39;Firefox&#39;, &#39;Chrome&#39;, &#39;Safari&#39;, &#39;IE10&#39;, &#39;Konqueror&#39;]</span>
<span class="sd"> &gt;&gt;&gt; ser = ps.Series([200, 200, 404, 404, 301],</span>
<span class="sd"> ... index=index, name=&#39;http_status&#39;)</span>
<span class="sd"> &gt;&gt;&gt; ser</span>
<span class="sd"> Firefox 200</span>
<span class="sd"> Chrome 200</span>
<span class="sd"> Safari 404</span>
<span class="sd"> IE10 404</span>
<span class="sd"> Konqueror 301</span>
<span class="sd"> Name: http_status, dtype: int64</span>
<span class="sd"> Create a new index and reindex the Series. By default</span>
<span class="sd"> values in the new index that do not have corresponding</span>
<span class="sd"> records in the Series are assigned ``NaN``.</span>
<span class="sd"> &gt;&gt;&gt; new_index= [&#39;Safari&#39;, &#39;Iceweasel&#39;, &#39;Comodo Dragon&#39;, &#39;IE10&#39;,</span>
<span class="sd"> ... &#39;Chrome&#39;]</span>
<span class="sd"> &gt;&gt;&gt; ser.reindex(new_index).sort_index()</span>
<span class="sd"> Chrome 200.0</span>
<span class="sd"> Comodo Dragon NaN</span>
<span class="sd"> IE10 404.0</span>
<span class="sd"> Iceweasel NaN</span>
<span class="sd"> Safari 404.0</span>
<span class="sd"> Name: http_status, dtype: float64</span>
<span class="sd"> We can fill in the missing values by passing a value to</span>
<span class="sd"> the keyword ``fill_value``.</span>
<span class="sd"> &gt;&gt;&gt; ser.reindex(new_index, fill_value=0).sort_index()</span>
<span class="sd"> Chrome 200</span>
<span class="sd"> Comodo Dragon 0</span>
<span class="sd"> IE10 404</span>
<span class="sd"> Iceweasel 0</span>
<span class="sd"> Safari 404</span>
<span class="sd"> Name: http_status, dtype: int64</span>
<span class="sd"> To further illustrate the filling functionality in</span>
<span class="sd"> ``reindex``, we will create a Series with a</span>
<span class="sd"> monotonically increasing index (for example, a sequence</span>
<span class="sd"> of dates).</span>
<span class="sd"> &gt;&gt;&gt; date_index = pd.date_range(&#39;1/1/2010&#39;, periods=6, freq=&#39;D&#39;)</span>
<span class="sd"> &gt;&gt;&gt; ser2 = ps.Series([100, 101, np.nan, 100, 89, 88],</span>
<span class="sd"> ... name=&#39;prices&#39;, index=date_index)</span>
<span class="sd"> &gt;&gt;&gt; ser2.sort_index()</span>
<span class="sd"> 2010-01-01 100.0</span>
<span class="sd"> 2010-01-02 101.0</span>
<span class="sd"> 2010-01-03 NaN</span>
<span class="sd"> 2010-01-04 100.0</span>
<span class="sd"> 2010-01-05 89.0</span>
<span class="sd"> 2010-01-06 88.0</span>
<span class="sd"> Name: prices, dtype: float64</span>
<span class="sd"> Suppose we decide to expand the series to cover a wider</span>
<span class="sd"> date range.</span>
<span class="sd"> &gt;&gt;&gt; date_index2 = pd.date_range(&#39;12/29/2009&#39;, periods=10, freq=&#39;D&#39;)</span>
<span class="sd"> &gt;&gt;&gt; ser2.reindex(date_index2).sort_index()</span>
<span class="sd"> 2009-12-29 NaN</span>
<span class="sd"> 2009-12-30 NaN</span>
<span class="sd"> 2009-12-31 NaN</span>
<span class="sd"> 2010-01-01 100.0</span>
<span class="sd"> 2010-01-02 101.0</span>
<span class="sd"> 2010-01-03 NaN</span>
<span class="sd"> 2010-01-04 100.0</span>
<span class="sd"> 2010-01-05 89.0</span>
<span class="sd"> 2010-01-06 88.0</span>
<span class="sd"> 2010-01-07 NaN</span>
<span class="sd"> Name: prices, dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">reindex</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="n">index</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="n">fill_value</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">name</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="Series.reindex_like"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.reindex_like.html#pyspark.pandas.Series.reindex_like">[docs]</a> <span class="k">def</span> <span class="nf">reindex_like</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="s2">&quot;DataFrame&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return a Series with matching indices as other object.</span>
<span class="sd"> Conform the object to the same index on all axes. Places NA/NaN in locations</span>
<span class="sd"> having no value in the previous index.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series or DataFrame</span>
<span class="sd"> Its row and column indices are used to define the new indices</span>
<span class="sd"> of this object.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with changed indices on each axis.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> DataFrame.set_index : Set row labels.</span>
<span class="sd"> DataFrame.reset_index : Remove row labels or move them to new columns.</span>
<span class="sd"> DataFrame.reindex : Change to new indices or expand indices.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Same as calling</span>
<span class="sd"> ``.reindex(index=other.index, ...)``.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([24.3, 31.0, 22.0, 35.0],</span>
<span class="sd"> ... index=pd.date_range(start=&#39;2014-02-12&#39;,</span>
<span class="sd"> ... end=&#39;2014-02-15&#39;, freq=&#39;D&#39;),</span>
<span class="sd"> ... name=&quot;temp_celsius&quot;)</span>
<span class="sd"> &gt;&gt;&gt; s1</span>
<span class="sd"> 2014-02-12 24.3</span>
<span class="sd"> 2014-02-13 31.0</span>
<span class="sd"> 2014-02-14 22.0</span>
<span class="sd"> 2014-02-15 35.0</span>
<span class="sd"> Name: temp_celsius, dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([&quot;low&quot;, &quot;low&quot;, &quot;medium&quot;],</span>
<span class="sd"> ... index=pd.DatetimeIndex([&#39;2014-02-12&#39;, &#39;2014-02-13&#39;,</span>
<span class="sd"> ... &#39;2014-02-15&#39;]),</span>
<span class="sd"> ... name=&quot;winspeed&quot;)</span>
<span class="sd"> &gt;&gt;&gt; s2</span>
<span class="sd"> 2014-02-12 low</span>
<span class="sd"> 2014-02-13 low</span>
<span class="sd"> 2014-02-15 medium</span>
<span class="sd"> Name: winspeed, dtype: object</span>
<span class="sd"> &gt;&gt;&gt; s2.reindex_like(s1).sort_index()</span>
<span class="sd"> 2014-02-12 low</span>
<span class="sd"> 2014-02-13 low</span>
<span class="sd"> 2014-02-14 None</span>
<span class="sd"> 2014-02-15 medium</span>
<span class="sd"> Name: winspeed, dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="p">(</span><span class="n">Series</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">)):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">reindex</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="n">other</span><span class="o">.</span><span class="n">index</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;other must be a pandas-on-Spark Series or DataFrame&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.fillna"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.fillna.html#pyspark.pandas.Series.fillna">[docs]</a> <span class="k">def</span> <span class="nf">fillna</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">value</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">method</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;Fill NA/NaN values.</span>
<span class="sd"> .. note:: the current implementation of &#39;method&#39; parameter in fillna uses Spark&#39;s Window</span>
<span class="sd"> without specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> value : scalar, dict, Series</span>
<span class="sd"> Value to use to fill holes. alternately a dict/Series of values</span>
<span class="sd"> specifying which value to use for each column.</span>
<span class="sd"> DataFrame is not supported.</span>
<span class="sd"> method : {&#39;backfill&#39;, &#39;bfill&#39;, &#39;pad&#39;, &#39;ffill&#39;, None}, default None</span>
<span class="sd"> Method to use for filling holes in reindexed Series pad / ffill: propagate last valid</span>
<span class="sd"> observation forward to next valid backfill / bfill:</span>
<span class="sd"> use NEXT valid observation to fill gap</span>
<span class="sd"> axis : {0 or `index`}</span>
<span class="sd"> 1 and `columns` are not supported.</span>
<span class="sd"> inplace : boolean, default False</span>
<span class="sd"> Fill in place (do not create a new object)</span>
<span class="sd"> limit : int, default None</span>
<span class="sd"> If method is specified, this is the maximum number of consecutive NaN values to</span>
<span class="sd"> forward/backward fill. In other words, if there is a gap with more than this number of</span>
<span class="sd"> consecutive NaNs, it will only be partially filled. If method is not specified,</span>
<span class="sd"> this is the maximum number of entries along the entire axis where NaNs will be filled.</span>
<span class="sd"> Must be greater than 0 if not None</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with NA entries filled.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([np.nan, 2, 3, 4, np.nan, 6], name=&#39;x&#39;)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> 4 NaN</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> Name: x, dtype: float64</span>
<span class="sd"> Replace all NaN elements with 0s.</span>
<span class="sd"> &gt;&gt;&gt; s.fillna(0)</span>
<span class="sd"> 0 0.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> 4 0.0</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> Name: x, dtype: float64</span>
<span class="sd"> We can also propagate non-null values forward or backward.</span>
<span class="sd"> &gt;&gt;&gt; s.fillna(method=&#39;ffill&#39;)</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> 4 4.0</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> Name: x, dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([np.nan, &#39;a&#39;, &#39;b&#39;, &#39;c&#39;, np.nan], name=&#39;x&#39;)</span>
<span class="sd"> &gt;&gt;&gt; s.fillna(method=&#39;ffill&#39;)</span>
<span class="sd"> 0 None</span>
<span class="sd"> 1 a</span>
<span class="sd"> 2 b</span>
<span class="sd"> 3 c</span>
<span class="sd"> 4 c</span>
<span class="sd"> Name: x, dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">psser</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_fillna</span><span class="p">(</span><span class="n">value</span><span class="o">=</span><span class="n">value</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="n">method</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="n">limit</span><span class="p">)</span>
<span class="k">if</span> <span class="n">method</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">psser</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">resolved_copy</span><span class="p">)</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="n">inplace</span> <span class="o">=</span> <span class="n">validate_bool_kwarg</span><span class="p">(</span><span class="n">inplace</span><span class="p">,</span> <span class="s2">&quot;inplace&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_update_internal_frame</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="p">,</span> <span class="n">requires_same_anchor</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">psser</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span></div>
<span class="k">def</span> <span class="nf">_fillna</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">value</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">method</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">limit</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">part_cols</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="s2">&quot;ColumnOrName&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(),</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span>
<span class="k">if</span> <span class="n">axis</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;fillna currently only works for axis=0 or axis=&#39;index&#39;&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="p">(</span><span class="n">value</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="n">method</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Must specify a fillna &#39;value&#39; or &#39;method&#39; parameter.&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="p">(</span><span class="n">method</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">)</span> <span class="ow">and</span> <span class="p">(</span><span class="n">method</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;ffill&quot;</span><span class="p">,</span> <span class="s2">&quot;pad&quot;</span><span class="p">,</span> <span class="s2">&quot;backfill&quot;</span><span class="p">,</span> <span class="s2">&quot;bfill&quot;</span><span class="p">]):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Expecting &#39;pad&#39;, &#39;ffill&#39;, &#39;backfill&#39; or &#39;bfill&#39;.&quot;</span><span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">nullable</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="p">(</span><span class="n">FloatType</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">)</span>
<span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="n">cond</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">isnull</span><span class="p">()</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="k">if</span> <span class="n">value</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="p">(</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">bool</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;Unsupported type </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">value</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
<span class="k">if</span> <span class="n">limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;limit parameter for value is not support now&quot;</span><span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">cond</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">method</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;ffill&quot;</span><span class="p">,</span> <span class="s2">&quot;pad&quot;</span><span class="p">]:</span>
<span class="n">func</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">last</span>
<span class="n">end</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span> <span class="o">-</span> <span class="mi">1</span>
<span class="k">if</span> <span class="n">limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">begin</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span> <span class="o">-</span> <span class="n">limit</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">begin</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span>
<span class="k">elif</span> <span class="n">method</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;bfill&quot;</span><span class="p">,</span> <span class="s2">&quot;backfill&quot;</span><span class="p">]:</span>
<span class="n">func</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">first</span>
<span class="n">begin</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span> <span class="o">+</span> <span class="mi">1</span>
<span class="k">if</span> <span class="n">limit</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">end</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span> <span class="o">+</span> <span class="n">limit</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">end</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">unboundedFollowing</span>
<span class="n">window</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">part_cols</span><span class="p">)</span>
<span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="n">begin</span><span class="p">,</span> <span class="n">end</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">cond</span><span class="p">,</span> <span class="n">func</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_spark_column</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">,</span> <span class="n">scol</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name_like_string</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">))</span> <span class="c1"># TODO: dtype?</span>
<span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<div class="viewcode-block" id="Series.dropna"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.dropna.html#pyspark.pandas.Series.dropna">[docs]</a> <span class="k">def</span> <span class="nf">dropna</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> <span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return a new Series with missing values removed.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> axis : {0 or &#39;index&#39;}, default 0</span>
<span class="sd"> There is only one axis to drop values from.</span>
<span class="sd"> inplace : bool, default False</span>
<span class="sd"> If True, do operation inplace and return None.</span>
<span class="sd"> **kwargs</span>
<span class="sd"> Not in use.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with NA entries dropped from it.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; ser = ps.Series([1., 2., np.nan])</span>
<span class="sd"> &gt;&gt;&gt; ser</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Drop NA values from a Series.</span>
<span class="sd"> &gt;&gt;&gt; ser.dropna()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Keep the Series with valid entries in the same variable.</span>
<span class="sd"> &gt;&gt;&gt; ser.dropna(inplace=True)</span>
<span class="sd"> &gt;&gt;&gt; ser</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">inplace</span> <span class="o">=</span> <span class="n">validate_bool_kwarg</span><span class="p">(</span><span class="n">inplace</span><span class="p">,</span> <span class="s2">&quot;inplace&quot;</span><span class="p">)</span>
<span class="c1"># TODO: last two examples from pandas produce different results.</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">[[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]]</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.clip"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.clip.html#pyspark.pandas.Series.clip">[docs]</a> <span class="k">def</span> <span class="nf">clip</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">lower</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span> <span class="n">upper</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Trim values at input threshold(s).</span>
<span class="sd"> Assigns values outside boundary to boundary values.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> lower : float or int, default None</span>
<span class="sd"> Minimum threshold value. All values below this threshold will be set to it.</span>
<span class="sd"> upper : float or int, default None</span>
<span class="sd"> Maximum threshold value. All values above this threshold will be set to it.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with the values outside the clip boundaries replaced</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; ps.Series([0, 2, 4]).clip(1, 3)</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> One difference between this implementation and pandas is that running</span>
<span class="sd"> `pd.Series([&#39;a&#39;, &#39;b&#39;]).clip(0, 1)` will crash with &quot;TypeError: &#39;&lt;=&#39; not supported between</span>
<span class="sd"> instances of &#39;str&#39; and &#39;int&#39;&quot; while `ps.Series([&#39;a&#39;, &#39;b&#39;]).clip(0, 1)` will output the</span>
<span class="sd"> original Series, simply ignoring the incompatible types.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">is_list_like</span><span class="p">(</span><span class="n">lower</span><span class="p">)</span> <span class="ow">or</span> <span class="n">is_list_like</span><span class="p">(</span><span class="n">upper</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;List-like value are not supported for &#39;lower&#39; and &#39;upper&#39; at the &quot;</span> <span class="o">+</span> <span class="s2">&quot;moment&quot;</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">lower</span> <span class="ow">is</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">upper</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">):</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="k">if</span> <span class="n">lower</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">scol</span> <span class="o">&lt;</span> <span class="n">lower</span><span class="p">,</span> <span class="n">lower</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="k">if</span> <span class="n">upper</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">scol</span> <span class="o">&gt;</span> <span class="n">upper</span><span class="p">,</span> <span class="n">upper</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span>
<span class="n">scol</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]),</span>
<span class="n">field</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span></div>
<div class="viewcode-block" id="Series.drop"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.drop.html#pyspark.pandas.Series.drop">[docs]</a> <span class="k">def</span> <span class="nf">drop</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">labels</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Name</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">index</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Name</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">level</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return Series with specified index labels removed.</span>
<span class="sd"> Remove elements of a Series based on specifying the index labels.</span>
<span class="sd"> When using a multi-index, labels on different levels can be removed by specifying the level.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> labels : single label or list-like</span>
<span class="sd"> Index labels to drop.</span>
<span class="sd"> index : None</span>
<span class="sd"> Redundant for application on Series, but index can be used instead of labels.</span>
<span class="sd"> level : int or level name, optional</span>
<span class="sd"> For MultiIndex, level for which the labels will be removed.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with specified index labels removed.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.dropna</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data=np.arange(3), index=[&#39;A&#39;, &#39;B&#39;, &#39;C&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> A 0</span>
<span class="sd"> B 1</span>
<span class="sd"> C 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Drop single label A</span>
<span class="sd"> &gt;&gt;&gt; s.drop(&#39;A&#39;)</span>
<span class="sd"> B 1</span>
<span class="sd"> C 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Drop labels B and C</span>
<span class="sd"> &gt;&gt;&gt; s.drop(labels=[&#39;B&#39;, &#39;C&#39;])</span>
<span class="sd"> A 0</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> With &#39;index&#39; rather than &#39;labels&#39; returns exactly same result.</span>
<span class="sd"> &gt;&gt;&gt; s.drop(index=&#39;A&#39;)</span>
<span class="sd"> B 1</span>
<span class="sd"> C 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.drop(index=[&#39;B&#39;, &#39;C&#39;])</span>
<span class="sd"> A 0</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Also support for MultiIndex</span>
<span class="sd"> &gt;&gt;&gt; midx = pd.MultiIndex([[&#39;lama&#39;, &#39;cow&#39;, &#39;falcon&#39;],</span>
<span class="sd"> ... [&#39;speed&#39;, &#39;weight&#39;, &#39;length&#39;]],</span>
<span class="sd"> ... [[0, 0, 0, 1, 1, 1, 2, 2, 2],</span>
<span class="sd"> ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],</span>
<span class="sd"> ... index=midx)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> lama speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.drop(labels=&#39;weight&#39;, level=1)</span>
<span class="sd"> lama speed 45.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.drop((&#39;lama&#39;, &#39;weight&#39;))</span>
<span class="sd"> lama speed 45.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.drop([(&#39;lama&#39;, &#39;speed&#39;), (&#39;falcon&#39;, &#39;weight&#39;)])</span>
<span class="sd"> lama weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_drop</span><span class="p">(</span><span class="n">labels</span><span class="o">=</span><span class="n">labels</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="n">index</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="n">level</span><span class="p">))</span></div>
<span class="k">def</span> <span class="nf">_drop</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">labels</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Name</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">index</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Name</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">level</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="k">if</span> <span class="n">labels</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="n">index</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Cannot specify both &#39;labels&#39; and &#39;index&#39;&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_drop</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="n">labels</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="n">level</span><span class="p">)</span>
<span class="k">if</span> <span class="n">index</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span>
<span class="k">if</span> <span class="n">level</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">level</span> <span class="o">=</span> <span class="mi">0</span>
<span class="k">if</span> <span class="n">level</span> <span class="o">&gt;=</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_level</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;&#39;level&#39; should be less than the number of indexes&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">is_name_like_tuple</span><span class="p">(</span><span class="n">index</span><span class="p">):</span>
<span class="n">index_list</span> <span class="o">=</span> <span class="p">[</span><span class="n">cast</span><span class="p">(</span><span class="n">Label</span><span class="p">,</span> <span class="n">index</span><span class="p">)]</span>
<span class="k">elif</span> <span class="n">is_name_like_value</span><span class="p">(</span><span class="n">index</span><span class="p">):</span>
<span class="n">index_list</span> <span class="o">=</span> <span class="p">[(</span><span class="n">index</span><span class="p">,)]</span>
<span class="k">elif</span> <span class="nb">all</span><span class="p">(</span><span class="n">is_name_like_value</span><span class="p">(</span><span class="n">idxes</span><span class="p">,</span> <span class="n">allow_tuple</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> <span class="k">for</span> <span class="n">idxes</span> <span class="ow">in</span> <span class="n">index</span><span class="p">):</span>
<span class="n">index_list</span> <span class="o">=</span> <span class="p">[(</span><span class="n">idex</span><span class="p">,)</span> <span class="k">for</span> <span class="n">idex</span> <span class="ow">in</span> <span class="n">index</span><span class="p">]</span>
<span class="k">elif</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">(</span><span class="n">is_name_like_tuple</span><span class="p">(</span><span class="n">idxes</span><span class="p">)</span> <span class="k">for</span> <span class="n">idxes</span> <span class="ow">in</span> <span class="n">index</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;If the given index is a list, it &quot;</span>
<span class="s2">&quot;should only contains names as all tuples or all non tuples &quot;</span>
<span class="s2">&quot;that contain index names&quot;</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">index_list</span> <span class="o">=</span> <span class="n">cast</span><span class="p">(</span><span class="n">List</span><span class="p">[</span><span class="n">Label</span><span class="p">],</span> <span class="n">index</span><span class="p">)</span>
<span class="n">drop_index_scols</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">idxes</span> <span class="ow">in</span> <span class="n">index_list</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">index_scols</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">[</span><span class="n">lvl</span><span class="p">]</span> <span class="o">==</span> <span class="n">idx</span>
<span class="k">for</span> <span class="n">lvl</span><span class="p">,</span> <span class="n">idx</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">idxes</span><span class="p">,</span> <span class="n">level</span><span class="p">)</span>
<span class="p">]</span>
<span class="k">except</span> <span class="ne">IndexError</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span>
<span class="s2">&quot;Key length (</span><span class="si">{}</span><span class="s2">) exceeds index depth (</span><span class="si">{}</span><span class="s2">)&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">internal</span><span class="o">.</span><span class="n">index_level</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">idxes</span><span class="p">)</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="n">drop_index_scols</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span> <span class="n">index_scols</span><span class="p">))</span>
<span class="n">cond</span> <span class="o">=</span> <span class="o">~</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">|</span> <span class="n">y</span><span class="p">,</span> <span class="n">drop_index_scols</span><span class="p">)</span>
<span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="o">.</span><span class="n">with_filter</span><span class="p">(</span><span class="n">cond</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Need to specify at least one of &#39;labels&#39; or &#39;index&#39;&quot;</span><span class="p">)</span>
<div class="viewcode-block" id="Series.head"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.head.html#pyspark.pandas.Series.head">[docs]</a> <span class="k">def</span> <span class="nf">head</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the first n rows.</span>
<span class="sd"> This function returns the first n rows for the object based on position.</span>
<span class="sd"> It is useful for quickly testing if your object has the right type of data in it.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> n : Integer, default = 5</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> The first n rows of the caller object.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;animal&#39;:[&#39;alligator&#39;, &#39;bee&#39;, &#39;falcon&#39;, &#39;lion&#39;]})</span>
<span class="sd"> &gt;&gt;&gt; df.animal.head(2) # doctest: +NORMALIZE_WHITESPACE</span>
<span class="sd"> 0 alligator</span>
<span class="sd"> 1 bee</span>
<span class="sd"> Name: animal, dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="n">n</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.last"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.last.html#pyspark.pandas.Series.last">[docs]</a> <span class="k">def</span> <span class="nf">last</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">offset</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">DateOffset</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Select final periods of time series data based on a date offset.</span>
<span class="sd"> When having a Series with dates as index, this function can</span>
<span class="sd"> select the last few elements based on a date offset.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> offset : str or DateOffset</span>
<span class="sd"> The offset length of the data that will be selected. For instance,</span>
<span class="sd"> &#39;3D&#39; will display all the rows having their index within the last 3 days.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> A subset of the caller.</span>
<span class="sd"> Raises</span>
<span class="sd"> ------</span>
<span class="sd"> TypeError</span>
<span class="sd"> If the index is not a :class:`DatetimeIndex`</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; index = pd.date_range(&#39;2018-04-09&#39;, periods=4, freq=&#39;2D&#39;)</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([1, 2, 3, 4], index=index)</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 2018-04-09 1</span>
<span class="sd"> 2018-04-11 2</span>
<span class="sd"> 2018-04-13 3</span>
<span class="sd"> 2018-04-15 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Get the rows for the last 3 days:</span>
<span class="sd"> &gt;&gt;&gt; psser.last(&#39;3D&#39;)</span>
<span class="sd"> 2018-04-13 3</span>
<span class="sd"> 2018-04-15 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Notice the data for 3 last calendar days were returned, not the last</span>
<span class="sd"> 3 observed days in the dataset, and therefore data for 2018-04-11 was</span>
<span class="sd"> not returned.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">last</span><span class="p">(</span><span class="n">offset</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.first"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.first.html#pyspark.pandas.Series.first">[docs]</a> <span class="k">def</span> <span class="nf">first</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">offset</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">DateOffset</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Select first periods of time series data based on a date offset.</span>
<span class="sd"> When having a Series with dates as index, this function can</span>
<span class="sd"> select the first few elements based on a date offset.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> offset : str or DateOffset</span>
<span class="sd"> The offset length of the data that will be selected. For instance,</span>
<span class="sd"> &#39;3D&#39; will display all the rows having their index within the first 3 days.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> A subset of the caller.</span>
<span class="sd"> Raises</span>
<span class="sd"> ------</span>
<span class="sd"> TypeError</span>
<span class="sd"> If the index is not a :class:`DatetimeIndex`</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; index = pd.date_range(&#39;2018-04-09&#39;, periods=4, freq=&#39;2D&#39;)</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([1, 2, 3, 4], index=index)</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 2018-04-09 1</span>
<span class="sd"> 2018-04-11 2</span>
<span class="sd"> 2018-04-13 3</span>
<span class="sd"> 2018-04-15 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Get the rows for the first 3 days:</span>
<span class="sd"> &gt;&gt;&gt; psser.first(&#39;3D&#39;)</span>
<span class="sd"> 2018-04-09 1</span>
<span class="sd"> 2018-04-11 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Notice the data for 3 first calendar days were returned, not the first</span>
<span class="sd"> 3 observed days in the dataset, and therefore data for 2018-04-13 was</span>
<span class="sd"> not returned.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">first</span><span class="p">(</span><span class="n">offset</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<span class="c1"># TODO: Categorical type isn&#39;t supported (due to PySpark&#39;s limitation) and</span>
<span class="c1"># some doctests related with timestamps were not added.</span>
<div class="viewcode-block" id="Series.unique"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.unique.html#pyspark.pandas.Series.unique">[docs]</a> <span class="k">def</span> <span class="nf">unique</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return unique values of Series object.</span>
<span class="sd"> Uniques are returned in order of appearance. Hash table-based unique,</span>
<span class="sd"> therefore does NOT sort.</span>
<span class="sd"> .. note:: This method returns newly created Series whereas pandas returns</span>
<span class="sd"> the unique values as a NumPy array.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Returns the unique values as a Series.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Index.unique</span>
<span class="sd"> groupby.SeriesGroupBy.unique</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([2, 1, 3, 3], name=&#39;A&#39;)</span>
<span class="sd"> &gt;&gt;&gt; psser.unique().sort_values() # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS</span>
<span class="sd"> &lt;BLANKLINE&gt;</span>
<span class="sd"> ... 1</span>
<span class="sd"> ... 2</span>
<span class="sd"> ... 3</span>
<span class="sd"> Name: A, dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; ps.Series([pd.Timestamp(&#39;2016-01-01&#39;) for _ in range(3)]).unique()</span>
<span class="sd"> 0 2016-01-01</span>
<span class="sd"> dtype: datetime64[ns]</span>
<span class="sd"> &gt;&gt;&gt; psser.name = (&#39;x&#39;, &#39;a&#39;)</span>
<span class="sd"> &gt;&gt;&gt; psser.unique().sort_values() # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS</span>
<span class="sd"> &lt;BLANKLINE&gt;</span>
<span class="sd"> ... 1</span>
<span class="sd"> ... 2</span>
<span class="sd"> ... 3</span>
<span class="sd"> Name: (x, a), dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
<span class="n">column_labels</span><span class="o">=</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">],</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])],</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]],</span>
<span class="n">column_label_names</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_label_names</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span></div>
<div class="viewcode-block" id="Series.sort_values"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.sort_values.html#pyspark.pandas.Series.sort_values">[docs]</a> <span class="k">def</span> <span class="nf">sort_values</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">ascending</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span> <span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">na_position</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;last&quot;</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Sort by the values.</span>
<span class="sd"> Sort a Series in ascending or descending order by some criterion.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> ascending : bool or list of bool, default True</span>
<span class="sd"> Sort ascending vs. descending. Specify list for multiple sort</span>
<span class="sd"> orders. If this is a list of bools, must match the length of</span>
<span class="sd"> the by.</span>
<span class="sd"> inplace : bool, default False</span>
<span class="sd"> if True, perform operation in-place</span>
<span class="sd"> na_position : {&#39;first&#39;, &#39;last&#39;}, default &#39;last&#39;</span>
<span class="sd"> `first` puts NaNs at the beginning, `last` puts NaNs at the end</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> sorted_obj : Series ordered by values.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([np.nan, 1, 3, 10, 5])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 10.0</span>
<span class="sd"> 4 5.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Sort values ascending order (default behaviour)</span>
<span class="sd"> &gt;&gt;&gt; s.sort_values(ascending=True)</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 4 5.0</span>
<span class="sd"> 3 10.0</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Sort values descending order</span>
<span class="sd"> &gt;&gt;&gt; s.sort_values(ascending=False)</span>
<span class="sd"> 3 10.0</span>
<span class="sd"> 4 5.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Sort values inplace</span>
<span class="sd"> &gt;&gt;&gt; s.sort_values(ascending=False, inplace=True)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 3 10.0</span>
<span class="sd"> 4 5.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Sort values putting NAs first</span>
<span class="sd"> &gt;&gt;&gt; s.sort_values(na_position=&#39;first&#39;)</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 4 5.0</span>
<span class="sd"> 3 10.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Sort a series of strings</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&#39;z&#39;, &#39;b&#39;, &#39;d&#39;, &#39;a&#39;, &#39;c&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 z</span>
<span class="sd"> 1 b</span>
<span class="sd"> 2 d</span>
<span class="sd"> 3 a</span>
<span class="sd"> 4 c</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; s.sort_values()</span>
<span class="sd"> 3 a</span>
<span class="sd"> 1 b</span>
<span class="sd"> 4 c</span>
<span class="sd"> 2 d</span>
<span class="sd"> 0 z</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">inplace</span> <span class="o">=</span> <span class="n">validate_bool_kwarg</span><span class="p">(</span><span class="n">inplace</span><span class="p">,</span> <span class="s2">&quot;inplace&quot;</span><span class="p">)</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">[[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]]</span><span class="o">.</span><span class="n">_sort</span><span class="p">(</span>
<span class="n">by</span><span class="o">=</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">],</span> <span class="n">ascending</span><span class="o">=</span><span class="n">ascending</span><span class="p">,</span> <span class="n">na_position</span><span class="o">=</span><span class="n">na_position</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.sort_index"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.sort_index.html#pyspark.pandas.Series.sort_index">[docs]</a> <span class="k">def</span> <span class="nf">sort_index</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="n">level</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">int</span><span class="p">]]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">ascending</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">inplace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">kind</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">na_position</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;last&quot;</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Sort object by labels (along an axis)</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> axis : index, columns to direct sorting. Currently, only axis = 0 is supported.</span>
<span class="sd"> level : int or level name or list of ints or list of level names</span>
<span class="sd"> if not None, sort on values in specified index level(s)</span>
<span class="sd"> ascending : boolean, default True</span>
<span class="sd"> Sort ascending vs. descending</span>
<span class="sd"> inplace : bool, default False</span>
<span class="sd"> if True, perform operation in-place</span>
<span class="sd"> kind : str, default None</span>
<span class="sd"> pandas-on-Spark does not allow specifying the sorting algorithm at the moment,</span>
<span class="sd"> default None</span>
<span class="sd"> na_position : {‘first’, ‘last’}, default ‘last’</span>
<span class="sd"> first puts NaNs at the beginning, last puts NaNs at the end. Not implemented for</span>
<span class="sd"> MultiIndex.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> sorted_obj : Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; df = ps.Series([2, 1, np.nan], index=[&#39;b&#39;, &#39;a&#39;, np.nan])</span>
<span class="sd"> &gt;&gt;&gt; df.sort_index()</span>
<span class="sd"> a 1.0</span>
<span class="sd"> b 2.0</span>
<span class="sd"> NaN NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; df.sort_index(ascending=False)</span>
<span class="sd"> b 2.0</span>
<span class="sd"> a 1.0</span>
<span class="sd"> NaN NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; df.sort_index(na_position=&#39;first&#39;)</span>
<span class="sd"> NaN NaN</span>
<span class="sd"> a 1.0</span>
<span class="sd"> b 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; df.sort_index(inplace=True)</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> a 1.0</span>
<span class="sd"> b 2.0</span>
<span class="sd"> NaN NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; df = ps.Series(range(4), index=[[&#39;b&#39;, &#39;b&#39;, &#39;a&#39;, &#39;a&#39;], [1, 0, 1, 0]], name=&#39;0&#39;)</span>
<span class="sd"> &gt;&gt;&gt; df.sort_index()</span>
<span class="sd"> a 0 3</span>
<span class="sd"> 1 2</span>
<span class="sd"> b 0 1</span>
<span class="sd"> 1 0</span>
<span class="sd"> Name: 0, dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; df.sort_index(level=1) # doctest: +SKIP</span>
<span class="sd"> a 0 3</span>
<span class="sd"> b 0 1</span>
<span class="sd"> a 1 2</span>
<span class="sd"> b 1 0</span>
<span class="sd"> Name: 0, dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; df.sort_index(level=[1, 0])</span>
<span class="sd"> a 0 3</span>
<span class="sd"> b 0 1</span>
<span class="sd"> a 1 2</span>
<span class="sd"> b 1 0</span>
<span class="sd"> Name: 0, dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">inplace</span> <span class="o">=</span> <span class="n">validate_bool_kwarg</span><span class="p">(</span><span class="n">inplace</span><span class="p">,</span> <span class="s2">&quot;inplace&quot;</span><span class="p">)</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">[[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]]</span><span class="o">.</span><span class="n">sort_index</span><span class="p">(</span>
<span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="n">level</span><span class="p">,</span> <span class="n">ascending</span><span class="o">=</span><span class="n">ascending</span><span class="p">,</span> <span class="n">kind</span><span class="o">=</span><span class="n">kind</span><span class="p">,</span> <span class="n">na_position</span><span class="o">=</span><span class="n">na_position</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">inplace</span><span class="p">:</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.swaplevel"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.swaplevel.html#pyspark.pandas.Series.swaplevel">[docs]</a> <span class="k">def</span> <span class="nf">swaplevel</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">i</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Name</span><span class="p">]</span> <span class="o">=</span> <span class="o">-</span><span class="mi">2</span><span class="p">,</span> <span class="n">j</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Name</span><span class="p">]</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">copy</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Swap levels i and j in a MultiIndex.</span>
<span class="sd"> Default is to swap the two innermost levels of the index.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> i, j : int, str</span>
<span class="sd"> Level of the indices to be swapped. Can pass level name as string.</span>
<span class="sd"> copy : bool, default True</span>
<span class="sd"> Whether to copy underlying data. Must be True.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with levels swapped in MultiIndex.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; midx = pd.MultiIndex.from_arrays([[&#39;a&#39;, &#39;b&#39;], [1, 2]], names = [&#39;word&#39;, &#39;number&#39;])</span>
<span class="sd"> &gt;&gt;&gt; midx # doctest: +SKIP</span>
<span class="sd"> MultiIndex([(&#39;a&#39;, 1),</span>
<span class="sd"> (&#39;b&#39;, 2)],</span>
<span class="sd"> names=[&#39;word&#39;, &#39;number&#39;])</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([&#39;x&#39;, &#39;y&#39;], index=midx)</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> word number</span>
<span class="sd"> a 1 x</span>
<span class="sd"> b 2 y</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; psser.swaplevel()</span>
<span class="sd"> number word</span>
<span class="sd"> 1 a x</span>
<span class="sd"> 2 b y</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; psser.swaplevel(0, 1)</span>
<span class="sd"> number word</span>
<span class="sd"> 1 a x</span>
<span class="sd"> 2 b y</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; psser.swaplevel(&#39;number&#39;, &#39;word&#39;)</span>
<span class="sd"> number word</span>
<span class="sd"> 1 a x</span>
<span class="sd"> 2 b y</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">copy</span> <span class="ow">is</span> <span class="kc">True</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">swaplevel</span><span class="p">(</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.swapaxes"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.swapaxes.html#pyspark.pandas.Series.swapaxes">[docs]</a> <span class="k">def</span> <span class="nf">swapaxes</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">i</span><span class="p">:</span> <span class="n">Axis</span><span class="p">,</span> <span class="n">j</span><span class="p">:</span> <span class="n">Axis</span><span class="p">,</span> <span class="n">copy</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Interchange axes and swap values axes appropriately.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> i: {0 or &#39;index&#39;, 1 or &#39;columns&#39;}. The axis to swap.</span>
<span class="sd"> j: {0 or &#39;index&#39;, 1 or &#39;columns&#39;}. The axis to swap.</span>
<span class="sd"> copy : bool, default True.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([1, 2, 3], index=[&quot;x&quot;, &quot;y&quot;, &quot;z&quot;])</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> x 1</span>
<span class="sd"> y 2</span>
<span class="sd"> z 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt;</span>
<span class="sd"> &gt;&gt;&gt; psser.swapaxes(0, 0)</span>
<span class="sd"> x 1</span>
<span class="sd"> y 2</span>
<span class="sd"> z 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">copy</span> <span class="ow">is</span> <span class="kc">True</span>
<span class="n">i</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">i</span><span class="p">)</span>
<span class="n">j</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">j</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">i</span> <span class="o">==</span> <span class="n">j</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Axis must be 0 for Series&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span></div>
<div class="viewcode-block" id="Series.add_prefix"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.add_prefix.html#pyspark.pandas.Series.add_prefix">[docs]</a> <span class="k">def</span> <span class="nf">add_prefix</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">prefix</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Prefix labels with string `prefix`.</span>
<span class="sd"> For Series, the row labels are prefixed.</span>
<span class="sd"> For DataFrame, the column labels are prefixed.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> prefix : str</span>
<span class="sd"> The string to add before each label.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> New Series with updated labels.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.add_suffix: Suffix column labels with string `suffix`.</span>
<span class="sd"> DataFrame.add_suffix: Suffix column labels with string `suffix`.</span>
<span class="sd"> DataFrame.add_prefix: Prefix column labels with string `prefix`.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.add_prefix(&#39;item_&#39;)</span>
<span class="sd"> item_0 1</span>
<span class="sd"> item_1 2</span>
<span class="sd"> item_2 3</span>
<span class="sd"> item_3 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">prefix</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">resolved_copy</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="p">[</span>
<span class="n">F</span><span class="o">.</span><span class="n">concat</span><span class="p">(</span><span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">prefix</span><span class="p">),</span> <span class="n">index_spark_column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">index_spark_column_name</span><span class="p">)</span>
<span class="k">for</span> <span class="n">index_spark_column</span><span class="p">,</span> <span class="n">index_spark_column_name</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span>
<span class="n">internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">,</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_spark_column_names</span>
<span class="p">)</span>
<span class="p">]</span>
<span class="o">+</span> <span class="n">internal</span><span class="o">.</span><span class="n">data_spark_columns</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span>
<span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="o">.</span><span class="n">with_new_sdf</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">index_fields</span><span class="o">=</span><span class="p">([</span><span class="kc">None</span><span class="p">]</span> <span class="o">*</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_level</span><span class="p">)))</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="Series.add_suffix"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.add_suffix.html#pyspark.pandas.Series.add_suffix">[docs]</a> <span class="k">def</span> <span class="nf">add_suffix</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">suffix</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Suffix labels with string suffix.</span>
<span class="sd"> For Series, the row labels are suffixed.</span>
<span class="sd"> For DataFrame, the column labels are suffixed.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> suffix : str</span>
<span class="sd"> The string to add after each label.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> New Series with updated labels.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.add_prefix: Prefix row labels with string `prefix`.</span>
<span class="sd"> DataFrame.add_prefix: Prefix column labels with string `prefix`.</span>
<span class="sd"> DataFrame.add_suffix: Suffix column labels with string `suffix`.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.add_suffix(&#39;_item&#39;)</span>
<span class="sd"> 0_item 1</span>
<span class="sd"> 1_item 2</span>
<span class="sd"> 2_item 3</span>
<span class="sd"> 3_item 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">suffix</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">resolved_copy</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="p">[</span>
<span class="n">F</span><span class="o">.</span><span class="n">concat</span><span class="p">(</span><span class="n">index_spark_column</span><span class="p">,</span> <span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">suffix</span><span class="p">))</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">index_spark_column_name</span><span class="p">)</span>
<span class="k">for</span> <span class="n">index_spark_column</span><span class="p">,</span> <span class="n">index_spark_column_name</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span>
<span class="n">internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">,</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_spark_column_names</span>
<span class="p">)</span>
<span class="p">]</span>
<span class="o">+</span> <span class="n">internal</span><span class="o">.</span><span class="n">data_spark_columns</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span>
<span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="o">.</span><span class="n">with_new_sdf</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">index_fields</span><span class="o">=</span><span class="p">([</span><span class="kc">None</span><span class="p">]</span> <span class="o">*</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_level</span><span class="p">)))</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="Series.corr"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.corr.html#pyspark.pandas.Series.corr">[docs]</a> <span class="k">def</span> <span class="nf">corr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">method</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;pearson&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compute correlation with `other` Series, excluding missing values.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series</span>
<span class="sd"> method : {&#39;pearson&#39;, &#39;spearman&#39;}</span>
<span class="sd"> * pearson : standard correlation coefficient</span>
<span class="sd"> * spearman : Spearman rank correlation</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> correlation : float</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;s1&#39;: [.2, .0, .6, .2],</span>
<span class="sd"> ... &#39;s2&#39;: [.3, .6, .0, .1]})</span>
<span class="sd"> &gt;&gt;&gt; s1 = df.s1</span>
<span class="sd"> &gt;&gt;&gt; s2 = df.s2</span>
<span class="sd"> &gt;&gt;&gt; s1.corr(s2, method=&#39;pearson&#39;) # doctest: +ELLIPSIS</span>
<span class="sd"> -0.851064...</span>
<span class="sd"> &gt;&gt;&gt; s1.corr(s2, method=&#39;spearman&#39;) # doctest: +ELLIPSIS</span>
<span class="sd"> -0.948683...</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> There are behavior differences between pandas-on-Spark and pandas.</span>
<span class="sd"> * the `method` argument only accepts &#39;pearson&#39;, &#39;spearman&#39;</span>
<span class="sd"> * the data should not contain NaNs. pandas-on-Spark will return an error.</span>
<span class="sd"> * pandas-on-Spark doesn&#39;t support the following argument(s).</span>
<span class="sd"> * `min_periods` argument is not supported</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># This implementation is suboptimal because it computes more than necessary,</span>
<span class="c1"># but it should be a start</span>
<span class="n">columns</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;__corr_arg1__&quot;</span><span class="p">,</span> <span class="s2">&quot;__corr_arg2__&quot;</span><span class="p">]</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">assign</span><span class="p">(</span><span class="n">__corr_arg1__</span><span class="o">=</span><span class="bp">self</span><span class="p">,</span> <span class="n">__corr_arg2__</span><span class="o">=</span><span class="n">other</span><span class="p">)[</span><span class="n">columns</span><span class="p">]</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="n">columns</span>
<span class="n">c</span> <span class="o">=</span> <span class="n">corr</span><span class="p">(</span><span class="n">psdf</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="n">method</span><span class="p">)</span>
<span class="k">return</span> <span class="n">c</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="nb">tuple</span><span class="p">(</span><span class="n">columns</span><span class="p">)]</span></div>
<div class="viewcode-block" id="Series.nsmallest"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.nsmallest.html#pyspark.pandas.Series.nsmallest">[docs]</a> <span class="k">def</span> <span class="nf">nsmallest</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the smallest `n` elements.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> n : int, default 5</span>
<span class="sd"> Return this many ascending sorted values.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> The `n` smallest values in the Series, sorted in increasing order.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.nlargest: Get the `n` largest elements.</span>
<span class="sd"> Series.sort_values: Sort Series by values.</span>
<span class="sd"> Series.head: Return the first `n` rows.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Faster than ``.sort_values().head(n)`` for small `n` relative to</span>
<span class="sd"> the size of the ``Series`` object.</span>
<span class="sd"> In pandas-on-Spark, thanks to Spark&#39;s lazy execution and query optimizer,</span>
<span class="sd"> the two would have same performance.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; data = [1, 2, 3, 4, np.nan ,6, 7, 8]</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> 4 NaN</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> 6 7.0</span>
<span class="sd"> 7 8.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> The `n` largest elements where ``n=5`` by default.</span>
<span class="sd"> &gt;&gt;&gt; s.nsmallest()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.nsmallest(3)</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">sort_values</span><span class="p">(</span><span class="n">ascending</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="n">n</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.nlargest"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.nlargest.html#pyspark.pandas.Series.nlargest">[docs]</a> <span class="k">def</span> <span class="nf">nlargest</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the largest `n` elements.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> n : int, default 5</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> The `n` largest values in the Series, sorted in decreasing order.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.nsmallest: Get the `n` smallest elements.</span>
<span class="sd"> Series.sort_values: Sort Series by values.</span>
<span class="sd"> Series.head: Return the first `n` rows.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Faster than ``.sort_values(ascending=False).head(n)`` for small `n`</span>
<span class="sd"> relative to the size of the ``Series`` object.</span>
<span class="sd"> In pandas-on-Spark, thanks to Spark&#39;s lazy execution and query optimizer,</span>
<span class="sd"> the two would have same performance.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; data = [1, 2, 3, 4, np.nan ,6, 7, 8]</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> 4 NaN</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> 6 7.0</span>
<span class="sd"> 7 8.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> The `n` largest elements where ``n=5`` by default.</span>
<span class="sd"> &gt;&gt;&gt; s.nlargest()</span>
<span class="sd"> 7 8.0</span>
<span class="sd"> 6 7.0</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.nlargest(n=3)</span>
<span class="sd"> 7 8.0</span>
<span class="sd"> 6 7.0</span>
<span class="sd"> 5 6.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">sort_values</span><span class="p">(</span><span class="n">ascending</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="n">n</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.append"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.append.html#pyspark.pandas.Series.append">[docs]</a> <span class="k">def</span> <span class="nf">append</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">to_append</span><span class="p">:</span> <span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">ignore_index</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">verify_integrity</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Concatenate two or more Series.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> to_append : Series or list/tuple of Series</span>
<span class="sd"> ignore_index : boolean, default False</span>
<span class="sd"> If True, do not use the index labels.</span>
<span class="sd"> verify_integrity : boolean, default False</span>
<span class="sd"> If True, raise Exception on creating index with duplicates</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> appended : Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([4, 5, 6])</span>
<span class="sd"> &gt;&gt;&gt; s3 = ps.Series([4, 5, 6], index=[3,4,5])</span>
<span class="sd"> &gt;&gt;&gt; s1.append(s2)</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 0 4</span>
<span class="sd"> 1 5</span>
<span class="sd"> 2 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s1.append(s3)</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> 4 5</span>
<span class="sd"> 5 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> With ignore_index set to True:</span>
<span class="sd"> &gt;&gt;&gt; s1.append(s2, ignore_index=True)</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> 4 5</span>
<span class="sd"> 5 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">to_append</span><span class="o">.</span><span class="n">to_frame</span><span class="p">(),</span> <span class="n">ignore_index</span><span class="p">,</span> <span class="n">verify_integrity</span><span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.sample"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.sample.html#pyspark.pandas.Series.sample">[docs]</a> <span class="k">def</span> <span class="nf">sample</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">n</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">frac</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">replace</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">random_state</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">n</span><span class="o">=</span><span class="n">n</span><span class="p">,</span> <span class="n">frac</span><span class="o">=</span><span class="n">frac</span><span class="p">,</span> <span class="n">replace</span><span class="o">=</span><span class="n">replace</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="n">random_state</span><span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<span class="n">sample</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="o">.</span><span class="n">sample</span><span class="o">.</span><span class="vm">__doc__</span>
<div class="viewcode-block" id="Series.hist"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.hist.html#pyspark.pandas.Series.hist">[docs]</a> <span class="nd">@no_type_check</span>
<span class="k">def</span> <span class="nf">hist</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">bins</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span> <span class="o">**</span><span class="n">kwds</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">plot</span><span class="o">.</span><span class="n">hist</span><span class="p">(</span><span class="n">bins</span><span class="p">,</span> <span class="o">**</span><span class="n">kwds</span><span class="p">)</span></div>
<span class="n">hist</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">PandasOnSparkPlotAccessor</span><span class="o">.</span><span class="n">hist</span><span class="o">.</span><span class="vm">__doc__</span>
<div class="viewcode-block" id="Series.apply"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.apply.html#pyspark.pandas.Series.apply">[docs]</a> <span class="k">def</span> <span class="nf">apply</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">,</span> <span class="n">args</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="n">Any</span><span class="p">]</span> <span class="o">=</span> <span class="p">(),</span> <span class="o">**</span><span class="n">kwds</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Invoke function on values of Series.</span>
<span class="sd"> Can be a Python function that only works on the Series.</span>
<span class="sd"> .. note:: this API executes the function once to infer the type which is</span>
<span class="sd"> potentially expensive, for instance, when the dataset is created after</span>
<span class="sd"> aggregations or sorting.</span>
<span class="sd"> To avoid this, specify return type in ``func``, for instance, as below:</span>
<span class="sd"> &gt;&gt;&gt; def square(x) -&gt; np.int32:</span>
<span class="sd"> ... return x ** 2</span>
<span class="sd"> pandas-on-Spark uses return type hint and does not try to infer the type.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> func : function</span>
<span class="sd"> Python function to apply. Note that type hint for return type is required.</span>
<span class="sd"> args : tuple</span>
<span class="sd"> Positional arguments passed to func after the series value.</span>
<span class="sd"> **kwds</span>
<span class="sd"> Additional keyword arguments passed to func.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.aggregate : Only perform aggregating type operations.</span>
<span class="sd"> Series.transform : Only perform transforming type operations.</span>
<span class="sd"> DataFrame.apply : The equivalent function for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Create a Series with typical summer temperatures for each city.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([20, 21, 12],</span>
<span class="sd"> ... index=[&#39;London&#39;, &#39;New York&#39;, &#39;Helsinki&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> London 20</span>
<span class="sd"> New York 21</span>
<span class="sd"> Helsinki 12</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Square the values by defining a function and passing it as an</span>
<span class="sd"> argument to ``apply()``.</span>
<span class="sd"> &gt;&gt;&gt; def square(x) -&gt; np.int64:</span>
<span class="sd"> ... return x ** 2</span>
<span class="sd"> &gt;&gt;&gt; s.apply(square)</span>
<span class="sd"> London 400</span>
<span class="sd"> New York 441</span>
<span class="sd"> Helsinki 144</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Define a custom function that needs additional positional</span>
<span class="sd"> arguments and pass these additional arguments using the</span>
<span class="sd"> ``args`` keyword</span>
<span class="sd"> &gt;&gt;&gt; def subtract_custom_value(x, custom_value) -&gt; np.int64:</span>
<span class="sd"> ... return x - custom_value</span>
<span class="sd"> &gt;&gt;&gt; s.apply(subtract_custom_value, args=(5,))</span>
<span class="sd"> London 15</span>
<span class="sd"> New York 16</span>
<span class="sd"> Helsinki 7</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Define a custom function that takes keyword arguments</span>
<span class="sd"> and pass these arguments to ``apply``</span>
<span class="sd"> &gt;&gt;&gt; def add_custom_values(x, **kwargs) -&gt; np.int64:</span>
<span class="sd"> ... for month in kwargs:</span>
<span class="sd"> ... x += kwargs[month]</span>
<span class="sd"> ... return x</span>
<span class="sd"> &gt;&gt;&gt; s.apply(add_custom_values, june=30, july=20, august=25)</span>
<span class="sd"> London 95</span>
<span class="sd"> New York 96</span>
<span class="sd"> Helsinki 87</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Use a function from the Numpy library</span>
<span class="sd"> &gt;&gt;&gt; def numpy_log(col) -&gt; np.float64:</span>
<span class="sd"> ... return np.log(col)</span>
<span class="sd"> &gt;&gt;&gt; s.apply(numpy_log)</span>
<span class="sd"> London 2.995732</span>
<span class="sd"> New York 3.044522</span>
<span class="sd"> Helsinki 2.484907</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> You can omit the type hint and let pandas-on-Spark infer its type.</span>
<span class="sd"> &gt;&gt;&gt; s.apply(np.log)</span>
<span class="sd"> London 2.995732</span>
<span class="sd"> New York 3.044522</span>
<span class="sd"> Helsinki 2.484907</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="n">callable</span><span class="p">(</span><span class="n">func</span><span class="p">),</span> <span class="s2">&quot;the first argument should be a callable function.&quot;</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">spec</span> <span class="o">=</span> <span class="n">inspect</span><span class="o">.</span><span class="n">getfullargspec</span><span class="p">(</span><span class="n">func</span><span class="p">)</span>
<span class="n">return_sig</span> <span class="o">=</span> <span class="n">spec</span><span class="o">.</span><span class="n">annotations</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;return&quot;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="n">should_infer_schema</span> <span class="o">=</span> <span class="n">return_sig</span> <span class="ow">is</span> <span class="kc">None</span>
<span class="k">except</span> <span class="ne">TypeError</span><span class="p">:</span>
<span class="c1"># Falls back to schema inference if it fails to get signature.</span>
<span class="n">should_infer_schema</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">def</span> <span class="nf">apply_each</span><span class="p">(</span><span class="n">s</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">:</span>
<span class="k">return</span> <span class="n">s</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwds</span><span class="p">)</span>
<span class="k">if</span> <span class="n">should_infer_schema</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">pandas_on_spark</span><span class="o">.</span><span class="n">_transform_batch</span><span class="p">(</span><span class="n">apply_each</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">sig_return</span> <span class="o">=</span> <span class="n">infer_return_type</span><span class="p">(</span><span class="n">func</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">sig_return</span><span class="p">,</span> <span class="n">ScalarType</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;Expected the return type of this function to be of scalar type, &quot;</span>
<span class="s2">&quot;but found type </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">sig_return</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">return_type</span> <span class="o">=</span> <span class="n">sig_return</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">pandas_on_spark</span><span class="o">.</span><span class="n">_transform_batch</span><span class="p">(</span><span class="n">apply_each</span><span class="p">,</span> <span class="n">return_type</span><span class="p">)</span></div>
<span class="c1"># TODO: not all arguments are implemented comparing to pandas&#39; for now.</span>
<div class="viewcode-block" id="Series.aggregate"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.aggregate.html#pyspark.pandas.Series.aggregate">[docs]</a> <span class="k">def</span> <span class="nf">aggregate</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">]])</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;Aggregate using one or more operations over the specified axis.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> func : str or a list of str</span>
<span class="sd"> function name(s) as string apply to series.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> scalar, Series</span>
<span class="sd"> The return can be:</span>
<span class="sd"> - scalar : when Series.agg is called with single function</span>
<span class="sd"> - Series : when Series.agg is called with several functions</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> `agg` is an alias for `aggregate`. Use the alias.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.apply : Invoke function on a Series.</span>
<span class="sd"> Series.transform : Only perform transforming type operations.</span>
<span class="sd"> Series.groupby : Perform operations over groups.</span>
<span class="sd"> DataFrame.aggregate : The equivalent function for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s.agg(&#39;min&#39;)</span>
<span class="sd"> 1</span>
<span class="sd"> &gt;&gt;&gt; s.agg([&#39;min&#39;, &#39;max&#39;]).sort_index()</span>
<span class="sd"> max 4</span>
<span class="sd"> min 1</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span><span class="n">func</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">return</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">)()</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;func must be a string or list of strings&quot;</span><span class="p">)</span></div>
<span class="n">agg</span> <span class="o">=</span> <span class="n">aggregate</span>
<span class="k">def</span> <span class="nf">transpose</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the transpose, which is by definition self.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> It returns the same object as the transpose of the given series object, which is by</span>
<span class="sd"> definition self.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.transpose()</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">T</span> <span class="o">=</span> <span class="nb">property</span><span class="p">(</span><span class="n">transpose</span><span class="p">)</span>
<div class="viewcode-block" id="Series.transform"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.transform.html#pyspark.pandas.Series.transform">[docs]</a> <span class="k">def</span> <span class="nf">transform</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">func</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Callable</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Callable</span><span class="p">]],</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Call ``func`` producing the same type as `self` with transformed values</span>
<span class="sd"> and that has the same axis length as input.</span>
<span class="sd"> .. note:: this API executes the function once to infer the type which is</span>
<span class="sd"> potentially expensive, for instance, when the dataset is created after</span>
<span class="sd"> aggregations or sorting.</span>
<span class="sd"> To avoid this, specify return type in ``func``, for instance, as below:</span>
<span class="sd"> &gt;&gt;&gt; def square(x) -&gt; np.int32:</span>
<span class="sd"> ... return x ** 2</span>
<span class="sd"> pandas-on-Spark uses return type hint and does not try to infer the type.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> func : function or list</span>
<span class="sd"> A function or a list of functions to use for transforming the data.</span>
<span class="sd"> axis : int, default 0 or &#39;index&#39;</span>
<span class="sd"> Can only be set to 0 at the moment.</span>
<span class="sd"> *args</span>
<span class="sd"> Positional arguments to pass to `func`.</span>
<span class="sd"> **kwargs</span>
<span class="sd"> Keyword arguments to pass to `func`.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> An instance of the same type with `self` that must have the same length as input.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.aggregate : Only perform aggregating type operations.</span>
<span class="sd"> Series.apply : Invoke function on Series.</span>
<span class="sd"> DataFrame.transform : The equivalent function for DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(range(3))</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 0</span>
<span class="sd"> 1 1</span>
<span class="sd"> 2 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; def sqrt(x) -&gt; float:</span>
<span class="sd"> ... return np.sqrt(x)</span>
<span class="sd"> &gt;&gt;&gt; s.transform(sqrt)</span>
<span class="sd"> 0 0.000000</span>
<span class="sd"> 1 1.000000</span>
<span class="sd"> 2 1.414214</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Even though the resulting instance must have the same length as the</span>
<span class="sd"> input, it is possible to provide several input functions:</span>
<span class="sd"> &gt;&gt;&gt; def exp(x) -&gt; float:</span>
<span class="sd"> ... return np.exp(x)</span>
<span class="sd"> &gt;&gt;&gt; s.transform([sqrt, exp])</span>
<span class="sd"> sqrt exp</span>
<span class="sd"> 0 0.000000 1.000000</span>
<span class="sd"> 1 1.000000 2.718282</span>
<span class="sd"> 2 1.414214 7.389056</span>
<span class="sd"> You can omit the type hint and let pandas-on-Spark infer its type.</span>
<span class="sd"> &gt;&gt;&gt; s.transform([np.sqrt, np.exp])</span>
<span class="sd"> sqrt exp</span>
<span class="sd"> 0 0.000000 1.000000</span>
<span class="sd"> 1 1.000000 2.718282</span>
<span class="sd"> 2 1.414214 7.389056</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span>
<span class="k">if</span> <span class="n">axis</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s1">&#39;axis should be either 0 or &quot;index&quot; currently.&#39;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
<span class="n">applied</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">func</span><span class="p">:</span>
<span class="n">applied</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="n">f</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="vm">__name__</span><span class="p">))</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_columns</span><span class="p">(</span><span class="n">applied</span><span class="p">)</span>
<span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.round"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.round.html#pyspark.pandas.Series.round">[docs]</a> <span class="k">def</span> <span class="nf">round</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">decimals</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Round each value in a Series to the given number of decimals.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> decimals : int</span>
<span class="sd"> Number of decimal places to round to (default: 0).</span>
<span class="sd"> If decimals is negative, it specifies the number of</span>
<span class="sd"> positions to the left of the decimal point.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series object</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> DataFrame.round</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; df = ps.Series([0.028208, 0.038683, 0.877076], name=&#39;x&#39;)</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> 0 0.028208</span>
<span class="sd"> 1 0.038683</span>
<span class="sd"> 2 0.877076</span>
<span class="sd"> Name: x, dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; df.round(2)</span>
<span class="sd"> 0 0.03</span>
<span class="sd"> 1 0.04</span>
<span class="sd"> 2 0.88</span>
<span class="sd"> Name: x, dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">decimals</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;decimals must be an integer&quot;</span><span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> <span class="n">decimals</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span>
<span class="n">scol</span><span class="p">,</span>
<span class="n">field</span><span class="o">=</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">nullable</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">DecimalType</span><span class="p">)</span>
<span class="k">else</span> <span class="kc">None</span>
<span class="p">),</span>
<span class="p">)</span></div>
<span class="c1"># TODO: add &#39;interpolation&#39; parameter.</span>
<div class="viewcode-block" id="Series.quantile"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.quantile.html#pyspark.pandas.Series.quantile">[docs]</a> <span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">q</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">float</span><span class="p">,</span> <span class="n">Iterable</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="mf">0.5</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">10000</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return value at the given quantile.</span>
<span class="sd"> .. note:: Unlike pandas&#39;, the quantile in pandas-on-Spark is an approximated quantile</span>
<span class="sd"> based upon approximate percentile computation because computing quantile across</span>
<span class="sd"> a large dataset is extremely expensive.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> q : float or array-like, default 0.5 (50% quantile)</span>
<span class="sd"> 0 &lt;= q &lt;= 1, the quantile(s) to compute.</span>
<span class="sd"> accuracy : int, optional</span>
<span class="sd"> Default accuracy of approximation. Larger value means better accuracy.</span>
<span class="sd"> The relative error can be deduced by 1.0 / accuracy.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> float or Series</span>
<span class="sd"> If the current object is a Series and ``q`` is an array, a Series will be</span>
<span class="sd"> returned where the index is ``q`` and the values are the quantiles, otherwise</span>
<span class="sd"> a float will be returned.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4, 5])</span>
<span class="sd"> &gt;&gt;&gt; s.quantile(.5)</span>
<span class="sd"> 3.0</span>
<span class="sd"> &gt;&gt;&gt; (s + 1).quantile(.5)</span>
<span class="sd"> 4.0</span>
<span class="sd"> &gt;&gt;&gt; s.quantile([.25, .5, .75])</span>
<span class="sd"> 0.25 2.0</span>
<span class="sd"> 0.50 3.0</span>
<span class="sd"> 0.75 4.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; (s + 1).quantile([.25, .5, .75])</span>
<span class="sd"> 0.25 3.0</span>
<span class="sd"> 0.50 4.0</span>
<span class="sd"> 0.75 5.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="n">Iterable</span><span class="p">):</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span>
<span class="n">cast</span><span class="p">(</span>
<span class="s2">&quot;ps.DataFrame&quot;</span><span class="p">,</span>
<span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="n">q</span><span class="o">=</span><span class="n">q</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">numeric_only</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">accuracy</span><span class="o">=</span><span class="n">accuracy</span><span class="p">),</span>
<span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">accuracy</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;accuracy must be an integer; however, got [</span><span class="si">%s</span><span class="s2">]&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">accuracy</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span>
<span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">q</span><span class="p">,</span> <span class="nb">float</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;q must be a float or an array of floats; however, [</span><span class="si">%s</span><span class="s2">] found.&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">q</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">q_float</span> <span class="o">=</span> <span class="n">q</span>
<span class="k">if</span> <span class="n">q_float</span> <span class="o">&lt;</span> <span class="mf">0.0</span> <span class="ow">or</span> <span class="n">q_float</span> <span class="o">&gt;</span> <span class="mf">1.0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;percentiles should all be in the interval [0, 1].&quot;</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">quantile</span><span class="p">(</span><span class="n">psser</span><span class="p">:</span> <span class="n">Series</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Column</span><span class="p">:</span>
<span class="n">spark_type</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span>
<span class="n">spark_column</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">spark_type</span><span class="p">,</span> <span class="p">(</span><span class="n">BooleanType</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">)):</span>
<span class="k">return</span> <span class="n">F</span><span class="o">.</span><span class="n">percentile_approx</span><span class="p">(</span><span class="n">spark_column</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">DoubleType</span><span class="p">()),</span> <span class="n">q_float</span><span class="p">,</span> <span class="n">accuracy</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;Could not convert </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">) to numeric&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="n">spark_type</span><span class="p">),</span> <span class="n">spark_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">()</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_reduce_for_stat_function</span><span class="p">(</span><span class="n">quantile</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;quantile&quot;</span><span class="p">)</span></div>
<span class="c1"># TODO: add axis, numeric_only, pct, na_option parameter</span>
<div class="viewcode-block" id="Series.rank"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.rank.html#pyspark.pandas.Series.rank">[docs]</a> <span class="k">def</span> <span class="nf">rank</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">method</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;average&quot;</span><span class="p">,</span> <span class="n">ascending</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compute numerical data ranks (1 through n) along axis. Equal values are</span>
<span class="sd"> assigned a rank that is the average of the ranks of those values.</span>
<span class="sd"> .. note:: the current implementation of rank uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> method : {&#39;average&#39;, &#39;min&#39;, &#39;max&#39;, &#39;first&#39;, &#39;dense&#39;}</span>
<span class="sd"> * average: average rank of group</span>
<span class="sd"> * min: lowest rank in group</span>
<span class="sd"> * max: highest rank in group</span>
<span class="sd"> * first: ranks assigned in order they appear in the array</span>
<span class="sd"> * dense: like &#39;min&#39;, but rank always increases by 1 between groups</span>
<span class="sd"> ascending : boolean, default True</span>
<span class="sd"> False for ranks by high (1) to low (N)</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> ranks : same type as caller</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 2, 3], name=&#39;A&#39;)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 2</span>
<span class="sd"> 3 3</span>
<span class="sd"> Name: A, dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.rank()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.5</span>
<span class="sd"> 2 2.5</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> Name: A, dtype: float64</span>
<span class="sd"> If method is set to &#39;min&#39;, it use lowest rank in group.</span>
<span class="sd"> &gt;&gt;&gt; s.rank(method=&#39;min&#39;)</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> Name: A, dtype: float64</span>
<span class="sd"> If method is set to &#39;max&#39;, it use highest rank in group.</span>
<span class="sd"> &gt;&gt;&gt; s.rank(method=&#39;max&#39;)</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 3.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> Name: A, dtype: float64</span>
<span class="sd"> If method is set to &#39;first&#39;, it is assigned rank in order without groups.</span>
<span class="sd"> &gt;&gt;&gt; s.rank(method=&#39;first&#39;)</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 3 4.0</span>
<span class="sd"> Name: A, dtype: float64</span>
<span class="sd"> If method is set to &#39;dense&#39;, it leaves no gaps in group.</span>
<span class="sd"> &gt;&gt;&gt; s.rank(method=&#39;dense&#39;)</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 3.0</span>
<span class="sd"> Name: A, dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_rank</span><span class="p">(</span><span class="n">method</span><span class="p">,</span> <span class="n">ascending</span><span class="p">)</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">analyzed</span></div>
<span class="k">def</span> <span class="nf">_rank</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">method</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;average&quot;</span><span class="p">,</span>
<span class="n">ascending</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">part_cols</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="s2">&quot;ColumnOrName&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(),</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">if</span> <span class="n">method</span> <span class="ow">not</span> <span class="ow">in</span> <span class="p">[</span><span class="s2">&quot;average&quot;</span><span class="p">,</span> <span class="s2">&quot;min&quot;</span><span class="p">,</span> <span class="s2">&quot;max&quot;</span><span class="p">,</span> <span class="s2">&quot;first&quot;</span><span class="p">,</span> <span class="s2">&quot;dense&quot;</span><span class="p">]:</span>
<span class="n">msg</span> <span class="o">=</span> <span class="s2">&quot;method must be one of &#39;average&#39;, &#39;min&#39;, &#39;max&#39;, &#39;first&#39;, &#39;dense&#39;&quot;</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="n">msg</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_level</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;rank do not support MultiIndex now&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">ascending</span><span class="p">:</span>
<span class="n">asc_func</span> <span class="o">=</span> <span class="n">Column</span><span class="o">.</span><span class="n">asc</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">asc_func</span> <span class="o">=</span> <span class="n">Column</span><span class="o">.</span><span class="n">desc</span>
<span class="k">if</span> <span class="n">method</span> <span class="o">==</span> <span class="s2">&quot;first&quot;</span><span class="p">:</span>
<span class="n">window</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span>
<span class="n">asc_func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">),</span>
<span class="n">asc_func</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)),</span>
<span class="p">)</span>
<span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">part_cols</span><span class="p">)</span>
<span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">method</span> <span class="o">==</span> <span class="s2">&quot;dense&quot;</span><span class="p">:</span>
<span class="n">window</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">asc_func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">))</span>
<span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">part_cols</span><span class="p">)</span>
<span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">dense_rank</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">method</span> <span class="o">==</span> <span class="s2">&quot;average&quot;</span><span class="p">:</span>
<span class="n">stat_func</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">mean</span>
<span class="k">elif</span> <span class="n">method</span> <span class="o">==</span> <span class="s2">&quot;min&quot;</span><span class="p">:</span>
<span class="n">stat_func</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">min</span>
<span class="k">elif</span> <span class="n">method</span> <span class="o">==</span> <span class="s2">&quot;max&quot;</span><span class="p">:</span>
<span class="n">stat_func</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">max</span>
<span class="n">window1</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">asc_func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">))</span>
<span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">part_cols</span><span class="p">)</span>
<span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">window2</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span>
<span class="n">cast</span><span class="p">(</span><span class="s2">&quot;List[ColumnOrName]&quot;</span><span class="p">,</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">])</span> <span class="o">+</span> <span class="nb">list</span><span class="p">(</span><span class="n">part_cols</span><span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">unboundedFollowing</span><span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">stat_func</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">row_number</span><span class="p">()</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window1</span><span class="p">))</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window2</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">DoubleType</span><span class="p">()))</span>
<div class="viewcode-block" id="Series.filter"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.filter.html#pyspark.pandas.Series.filter">[docs]</a> <span class="k">def</span> <span class="nf">filter</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">items</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Sequence</span><span class="p">[</span><span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">like</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">regex</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span>
<span class="k">if</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Series does not support columns axis.&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">items</span><span class="o">=</span><span class="n">items</span><span class="p">,</span> <span class="n">like</span><span class="o">=</span><span class="n">like</span><span class="p">,</span> <span class="n">regex</span><span class="o">=</span><span class="n">regex</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<span class="nb">filter</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="o">.</span><span class="n">filter</span><span class="o">.</span><span class="vm">__doc__</span>
<div class="viewcode-block" id="Series.describe"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.describe.html#pyspark.pandas.Series.describe">[docs]</a> <span class="k">def</span> <span class="nf">describe</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">percentiles</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">float</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">percentiles</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<span class="n">describe</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="o">.</span><span class="n">describe</span><span class="o">.</span><span class="vm">__doc__</span>
<div class="viewcode-block" id="Series.diff"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.diff.html#pyspark.pandas.Series.diff">[docs]</a> <span class="k">def</span> <span class="nf">diff</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">periods</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> First discrete difference of element.</span>
<span class="sd"> Calculates the difference of a Series element compared with another element in the</span>
<span class="sd"> DataFrame (default is the element in the same column of the previous row).</span>
<span class="sd"> .. note:: the current implementation of diff uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> periods : int, default 1</span>
<span class="sd"> Periods to shift for calculating difference, accepts negative values.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> diffed : Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&#39;a&#39;: [1, 2, 3, 4, 5, 6],</span>
<span class="sd"> ... &#39;b&#39;: [1, 1, 2, 3, 5, 8],</span>
<span class="sd"> ... &#39;c&#39;: [1, 4, 9, 16, 25, 36]}, columns=[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;])</span>
<span class="sd"> &gt;&gt;&gt; df</span>
<span class="sd"> a b c</span>
<span class="sd"> 0 1 1 1</span>
<span class="sd"> 1 2 1 4</span>
<span class="sd"> 2 3 2 9</span>
<span class="sd"> 3 4 3 16</span>
<span class="sd"> 4 5 5 25</span>
<span class="sd"> 5 6 8 36</span>
<span class="sd"> &gt;&gt;&gt; df.b.diff()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 0.0</span>
<span class="sd"> 2 1.0</span>
<span class="sd"> 3 1.0</span>
<span class="sd"> 4 2.0</span>
<span class="sd"> 5 3.0</span>
<span class="sd"> Name: b, dtype: float64</span>
<span class="sd"> Difference with previous value</span>
<span class="sd"> &gt;&gt;&gt; df.c.diff(periods=3)</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 NaN</span>
<span class="sd"> 3 15.0</span>
<span class="sd"> 4 21.0</span>
<span class="sd"> 5 27.0</span>
<span class="sd"> Name: c, dtype: float64</span>
<span class="sd"> Difference with following value</span>
<span class="sd"> &gt;&gt;&gt; df.c.diff(periods=-1)</span>
<span class="sd"> 0 -3.0</span>
<span class="sd"> 1 -5.0</span>
<span class="sd"> 2 -7.0</span>
<span class="sd"> 3 -9.0</span>
<span class="sd"> 4 -11.0</span>
<span class="sd"> 5 NaN</span>
<span class="sd"> Name: c, dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_diff</span><span class="p">(</span><span class="n">periods</span><span class="p">)</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">analyzed</span></div>
<span class="k">def</span> <span class="nf">_diff</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">periods</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">part_cols</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="s2">&quot;ColumnOrName&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">())</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">periods</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;periods should be an int; however, got [</span><span class="si">%s</span><span class="s2">]&quot;</span> <span class="o">%</span> <span class="nb">type</span><span class="p">(</span><span class="n">periods</span><span class="p">)</span><span class="o">.</span><span class="vm">__name__</span><span class="p">)</span>
<span class="n">window</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">part_cols</span><span class="p">)</span>
<span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="o">-</span><span class="n">periods</span><span class="p">,</span> <span class="o">-</span><span class="n">periods</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="o">-</span> <span class="n">F</span><span class="o">.</span><span class="n">lag</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> <span class="n">periods</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">field</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">nullable</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span>
<div class="viewcode-block" id="Series.idxmax"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.idxmax.html#pyspark.pandas.Series.idxmax">[docs]</a> <span class="k">def</span> <span class="nf">idxmax</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Tuple</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the row label of the maximum value.</span>
<span class="sd"> If multiple values equal the maximum, the first row label with that</span>
<span class="sd"> value is returned.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> skipna : bool, default True</span>
<span class="sd"> Exclude NA/null values. If the entire Series is NA, the result</span>
<span class="sd"> will be NA.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Index</span>
<span class="sd"> Label of the maximum value.</span>
<span class="sd"> Raises</span>
<span class="sd"> ------</span>
<span class="sd"> ValueError</span>
<span class="sd"> If the Series is empty.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.idxmin : Return index *label* of the first occurrence</span>
<span class="sd"> of minimum of values.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data=[1, None, 4, 3, 5],</span>
<span class="sd"> ... index=[&#39;A&#39;, &#39;B&#39;, &#39;C&#39;, &#39;D&#39;, &#39;E&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> A 1.0</span>
<span class="sd"> B NaN</span>
<span class="sd"> C 4.0</span>
<span class="sd"> D 3.0</span>
<span class="sd"> E 5.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.idxmax()</span>
<span class="sd"> &#39;E&#39;</span>
<span class="sd"> If `skipna` is False and there is an NA value in the data,</span>
<span class="sd"> the function returns ``nan``.</span>
<span class="sd"> &gt;&gt;&gt; s.idxmax(skipna=False)</span>
<span class="sd"> nan</span>
<span class="sd"> In case of multi-index, you get a tuple:</span>
<span class="sd"> &gt;&gt;&gt; index = pd.MultiIndex.from_arrays([</span>
<span class="sd"> ... [&#39;a&#39;, &#39;a&#39;, &#39;b&#39;, &#39;b&#39;], [&#39;c&#39;, &#39;d&#39;, &#39;e&#39;, &#39;f&#39;]], names=(&#39;first&#39;, &#39;second&#39;))</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data=[1, None, 4, 5], index=index)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> first second</span>
<span class="sd"> a c 1.0</span>
<span class="sd"> d NaN</span>
<span class="sd"> b e 4.0</span>
<span class="sd"> f 5.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.idxmax()</span>
<span class="sd"> (&#39;b&#39;, &#39;f&#39;)</span>
<span class="sd"> If multiple values equal the maximum, the first row label with that</span>
<span class="sd"> value is returned.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 100, 1, 100, 1, 100], index=[10, 3, 5, 2, 1, 8])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 10 1</span>
<span class="sd"> 3 100</span>
<span class="sd"> 5 1</span>
<span class="sd"> 2 100</span>
<span class="sd"> 1 1</span>
<span class="sd"> 8 100</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.idxmax()</span>
<span class="sd"> 3</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">index_scols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span>
<span class="k">if</span> <span class="n">skipna</span><span class="p">:</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">desc_nulls_last</span><span class="p">(),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">desc_nulls_first</span><span class="p">(),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="n">results</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">([</span><span class="n">scol</span><span class="p">]</span> <span class="o">+</span> <span class="n">index_scols</span><span class="p">)</span><span class="o">.</span><span class="n">take</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">results</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;attempt to get idxmin of an empty sequence&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># This will only happens when skipna is False because we will</span>
<span class="c1"># place nulls first.</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="n">values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">:])</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">values</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">return</span> <span class="n">values</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">values</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.idxmin"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.idxmin.html#pyspark.pandas.Series.idxmin">[docs]</a> <span class="k">def</span> <span class="nf">idxmin</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Tuple</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the row label of the minimum value.</span>
<span class="sd"> If multiple values equal the minimum, the first row label with that</span>
<span class="sd"> value is returned.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> skipna : bool, default True</span>
<span class="sd"> Exclude NA/null values. If the entire Series is NA, the result</span>
<span class="sd"> will be NA.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Index</span>
<span class="sd"> Label of the minimum value.</span>
<span class="sd"> Raises</span>
<span class="sd"> ------</span>
<span class="sd"> ValueError</span>
<span class="sd"> If the Series is empty.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.idxmax : Return index *label* of the first occurrence</span>
<span class="sd"> of maximum of values.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> This method is the Series version of ``ndarray.argmin``. This method</span>
<span class="sd"> returns the label of the minimum, while ``ndarray.argmin`` returns</span>
<span class="sd"> the position. To get the position, use ``series.values.argmin()``.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data=[1, None, 4, 0],</span>
<span class="sd"> ... index=[&#39;A&#39;, &#39;B&#39;, &#39;C&#39;, &#39;D&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> A 1.0</span>
<span class="sd"> B NaN</span>
<span class="sd"> C 4.0</span>
<span class="sd"> D 0.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.idxmin()</span>
<span class="sd"> &#39;D&#39;</span>
<span class="sd"> If `skipna` is False and there is an NA value in the data,</span>
<span class="sd"> the function returns ``nan``.</span>
<span class="sd"> &gt;&gt;&gt; s.idxmin(skipna=False)</span>
<span class="sd"> nan</span>
<span class="sd"> In case of multi-index, you get a tuple:</span>
<span class="sd"> &gt;&gt;&gt; index = pd.MultiIndex.from_arrays([</span>
<span class="sd"> ... [&#39;a&#39;, &#39;a&#39;, &#39;b&#39;, &#39;b&#39;], [&#39;c&#39;, &#39;d&#39;, &#39;e&#39;, &#39;f&#39;]], names=(&#39;first&#39;, &#39;second&#39;))</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data=[1, None, 4, 0], index=index)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> first second</span>
<span class="sd"> a c 1.0</span>
<span class="sd"> d NaN</span>
<span class="sd"> b e 4.0</span>
<span class="sd"> f 0.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.idxmin()</span>
<span class="sd"> (&#39;b&#39;, &#39;f&#39;)</span>
<span class="sd"> If multiple values equal the minimum, the first row label with that</span>
<span class="sd"> value is returned.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 100, 1, 100, 1, 100], index=[10, 3, 5, 2, 1, 8])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 10 1</span>
<span class="sd"> 3 100</span>
<span class="sd"> 5 1</span>
<span class="sd"> 2 100</span>
<span class="sd"> 1 1</span>
<span class="sd"> 8 100</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.idxmin()</span>
<span class="sd"> 10</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">index_scols</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span>
<span class="k">if</span> <span class="n">skipna</span><span class="p">:</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">asc_nulls_last</span><span class="p">(),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">scol</span><span class="o">.</span><span class="n">asc_nulls_first</span><span class="p">(),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="n">results</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">([</span><span class="n">scol</span><span class="p">]</span> <span class="o">+</span> <span class="n">index_scols</span><span class="p">)</span><span class="o">.</span><span class="n">take</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">results</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;attempt to get idxmin of an empty sequence&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="c1"># This will only happens when skipna is False because we will</span>
<span class="c1"># place nulls first.</span>
<span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="n">values</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">1</span><span class="p">:])</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">values</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">return</span> <span class="n">values</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">values</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.pop"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.pop.html#pyspark.pandas.Series.pop">[docs]</a> <span class="k">def</span> <span class="nf">pop</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="n">Name</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">Scalar</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return item and drop from series.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> item : label</span>
<span class="sd"> Label of index to be popped.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Value that is popped from series.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data=np.arange(3), index=[&#39;A&#39;, &#39;B&#39;, &#39;C&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> A 0</span>
<span class="sd"> B 1</span>
<span class="sd"> C 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.pop(&#39;A&#39;)</span>
<span class="sd"> 0</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> B 1</span>
<span class="sd"> C 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series(data=np.arange(3), index=[&#39;A&#39;, &#39;A&#39;, &#39;C&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> A 0</span>
<span class="sd"> A 1</span>
<span class="sd"> C 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.pop(&#39;A&#39;)</span>
<span class="sd"> A 0</span>
<span class="sd"> A 1</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> C 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Also support for MultiIndex</span>
<span class="sd"> &gt;&gt;&gt; midx = pd.MultiIndex([[&#39;lama&#39;, &#39;cow&#39;, &#39;falcon&#39;],</span>
<span class="sd"> ... [&#39;speed&#39;, &#39;weight&#39;, &#39;length&#39;]],</span>
<span class="sd"> ... [[0, 0, 0, 1, 1, 1, 2, 2, 2],</span>
<span class="sd"> ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],</span>
<span class="sd"> ... index=midx)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> lama speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.pop(&#39;lama&#39;)</span>
<span class="sd"> speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Also support for MultiIndex with several indexs.</span>
<span class="sd"> &gt;&gt;&gt; midx = pd.MultiIndex([[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;],</span>
<span class="sd"> ... [&#39;lama&#39;, &#39;cow&#39;, &#39;falcon&#39;],</span>
<span class="sd"> ... [&#39;speed&#39;, &#39;weight&#39;, &#39;length&#39;]],</span>
<span class="sd"> ... [[0, 0, 0, 0, 0, 0, 1, 1, 1],</span>
<span class="sd"> ... [0, 0, 0, 1, 1, 1, 2, 2, 2],</span>
<span class="sd"> ... [0, 1, 2, 0, 1, 2, 0, 0, 2]]</span>
<span class="sd"> ... )</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],</span>
<span class="sd"> ... index=midx)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> a lama speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> b falcon speed 320.0</span>
<span class="sd"> speed 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.pop((&#39;a&#39;, &#39;lama&#39;))</span>
<span class="sd"> speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> a cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> b falcon speed 320.0</span>
<span class="sd"> speed 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.pop((&#39;b&#39;, &#39;falcon&#39;, &#39;speed&#39;))</span>
<span class="sd"> (b, falcon, speed) 320.0</span>
<span class="sd"> (b, falcon, speed) 1.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_name_like_value</span><span class="p">(</span><span class="n">item</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;&#39;key&#39; should be string or tuple that contains strings&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_name_like_tuple</span><span class="p">(</span><span class="n">item</span><span class="p">):</span>
<span class="n">item</span> <span class="o">=</span> <span class="p">(</span><span class="n">item</span><span class="p">,)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_level</span> <span class="o">&lt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span>
<span class="s2">&quot;Key length (</span><span class="si">{}</span><span class="s2">) exceeds index depth (</span><span class="si">{}</span><span class="s2">)&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_level</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span>
<span class="n">scols</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="p">:]</span> <span class="o">+</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">]</span>
<span class="n">rows</span> <span class="o">=</span> <span class="p">[</span><span class="n">internal</span><span class="o">.</span><span class="n">spark_columns</span><span class="p">[</span><span class="n">level</span><span class="p">]</span> <span class="o">==</span> <span class="n">index</span> <span class="k">for</span> <span class="n">level</span><span class="p">,</span> <span class="n">index</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">item</span><span class="p">)]</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span> <span class="n">rows</span><span class="p">))</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">scols</span><span class="p">)</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_drop</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_update_anchor</span><span class="p">(</span><span class="n">psdf</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_level</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">):</span>
<span class="c1"># if spark_frame has one column and one data, return data only without frame</span>
<span class="n">pdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">toPandas</span><span class="p">()</span>
<span class="n">length</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">pdf</span><span class="p">)</span>
<span class="k">if</span> <span class="n">length</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">val</span> <span class="o">=</span> <span class="n">pdf</span><span class="p">[</span><span class="n">internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">,</span> <span class="n">CategoricalDtype</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">categories</span><span class="p">[</span><span class="n">val</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">val</span>
<span class="n">item_string</span> <span class="o">=</span> <span class="n">name_like_string</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">withColumn</span><span class="p">(</span><span class="n">SPARK_DEFAULT_INDEX_NAME</span><span class="p">,</span> <span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">item_string</span><span class="p">)))</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">SPARK_DEFAULT_INDEX_NAME</span><span class="p">)],</span>
<span class="n">column_labels</span><span class="o">=</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">],</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]],</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="p">[</span>
<span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_spark_column_names</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="p">:]</span>
<span class="p">],</span>
<span class="n">index_fields</span><span class="o">=</span><span class="n">internal</span><span class="o">.</span><span class="n">index_fields</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="p">:],</span>
<span class="n">index_names</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_names</span><span class="p">[</span><span class="nb">len</span><span class="p">(</span><span class="n">item</span><span class="p">)</span> <span class="p">:],</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])],</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span></div>
<div class="viewcode-block" id="Series.copy"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.copy.html#pyspark.pandas.Series.copy">[docs]</a> <span class="k">def</span> <span class="nf">copy</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">deep</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Make a copy of this object&#39;s indices and data.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> deep : bool, default True</span>
<span class="sd"> this parameter is not supported but just dummy parameter to match pandas.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> copy : Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2], index=[&quot;a&quot;, &quot;b&quot;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> a 1</span>
<span class="sd"> b 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s_copy = s.copy()</span>
<span class="sd"> &gt;&gt;&gt; s_copy</span>
<span class="sd"> a 1</span>
<span class="sd"> b 2</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="p">))</span></div>
<div class="viewcode-block" id="Series.mode"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.mode.html#pyspark.pandas.Series.mode">[docs]</a> <span class="k">def</span> <span class="nf">mode</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dropna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the mode(s) of the dataset.</span>
<span class="sd"> Always returns Series even if only one value is returned.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> dropna : bool, default True</span>
<span class="sd"> Don&#39;t consider counts of NaN/NaT.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Modes of the Series.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([0, 0, 1, 1, 1, np.nan, np.nan, np.nan])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 0.0</span>
<span class="sd"> 1 0.0</span>
<span class="sd"> 2 1.0</span>
<span class="sd"> 3 1.0</span>
<span class="sd"> 4 1.0</span>
<span class="sd"> 5 NaN</span>
<span class="sd"> 6 NaN</span>
<span class="sd"> 7 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.mode()</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> If there are several same modes, all items are shown</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3,</span>
<span class="sd"> ... np.nan, np.nan, np.nan])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 0.0</span>
<span class="sd"> 1 0.0</span>
<span class="sd"> 2 1.0</span>
<span class="sd"> 3 1.0</span>
<span class="sd"> 4 1.0</span>
<span class="sd"> 5 2.0</span>
<span class="sd"> 6 2.0</span>
<span class="sd"> 7 2.0</span>
<span class="sd"> 8 3.0</span>
<span class="sd"> 9 3.0</span>
<span class="sd"> 10 3.0</span>
<span class="sd"> 11 NaN</span>
<span class="sd"> 12 NaN</span>
<span class="sd"> 13 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.mode().sort_values() # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS</span>
<span class="sd"> &lt;BLANKLINE&gt;</span>
<span class="sd"> ... 1.0</span>
<span class="sd"> ... 2.0</span>
<span class="sd"> ... 3.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> With &#39;dropna&#39; set to &#39;False&#39;, we can also see NaN in the result</span>
<span class="sd"> &gt;&gt;&gt; s.mode(False).sort_values() # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS</span>
<span class="sd"> &lt;BLANKLINE&gt;</span>
<span class="sd"> ... 1.0</span>
<span class="sd"> ... 2.0</span>
<span class="sd"> ... 3.0</span>
<span class="sd"> ... NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">ser_count</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">value_counts</span><span class="p">(</span><span class="n">dropna</span><span class="o">=</span><span class="n">dropna</span><span class="p">,</span> <span class="n">sort</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="n">sdf_count</span> <span class="o">=</span> <span class="n">ser_count</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span>
<span class="n">most_value</span> <span class="o">=</span> <span class="n">ser_count</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
<span class="n">sdf_most_value</span> <span class="o">=</span> <span class="n">sdf_count</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="s2">&quot;count == </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">most_value</span><span class="p">))</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf_most_value</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="n">SPARK_DEFAULT_INDEX_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">SPARK_DEFAULT_SERIES_NAME</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="p">(</span><span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span> <span class="n">index_spark_columns</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">column_labels</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">])</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span></div>
<div class="viewcode-block" id="Series.keys"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.keys.html#pyspark.pandas.Series.keys">[docs]</a> <span class="k">def</span> <span class="nf">keys</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;ps.Index&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return alias for index.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Index</span>
<span class="sd"> Index of the Series.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; midx = pd.MultiIndex([[&#39;lama&#39;, &#39;cow&#39;, &#39;falcon&#39;],</span>
<span class="sd"> ... [&#39;speed&#39;, &#39;weight&#39;, &#39;length&#39;]],</span>
<span class="sd"> ... [[0, 0, 0, 1, 1, 1, 2, 2, 2],</span>
<span class="sd"> ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)</span>
<span class="sd"> &gt;&gt;&gt; psser.keys() # doctest: +SKIP</span>
<span class="sd"> MultiIndex([( &#39;lama&#39;, &#39;speed&#39;),</span>
<span class="sd"> ( &#39;lama&#39;, &#39;weight&#39;),</span>
<span class="sd"> ( &#39;lama&#39;, &#39;length&#39;),</span>
<span class="sd"> ( &#39;cow&#39;, &#39;speed&#39;),</span>
<span class="sd"> ( &#39;cow&#39;, &#39;weight&#39;),</span>
<span class="sd"> ( &#39;cow&#39;, &#39;length&#39;),</span>
<span class="sd"> (&#39;falcon&#39;, &#39;speed&#39;),</span>
<span class="sd"> (&#39;falcon&#39;, &#39;weight&#39;),</span>
<span class="sd"> (&#39;falcon&#39;, &#39;length&#39;)],</span>
<span class="sd"> )</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span></div>
<span class="c1"># TODO: &#39;regex&#39;, &#39;method&#39; parameter</span>
<div class="viewcode-block" id="Series.replace"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.replace.html#pyspark.pandas.Series.replace">[docs]</a> <span class="k">def</span> <span class="nf">replace</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">to_replace</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Any</span><span class="p">,</span> <span class="n">List</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">,</span> <span class="n">Dict</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">value</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">List</span><span class="p">,</span> <span class="n">Tuple</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">regex</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Replace values given in to_replace with value.</span>
<span class="sd"> Values of the Series are replaced with other values dynamically.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> to_replace : str, list, tuple, dict, Series, int, float, or None</span>
<span class="sd"> How to find the values that will be replaced.</span>
<span class="sd"> * numeric, str:</span>
<span class="sd"> - numeric: numeric values equal to to_replace will be replaced with value</span>
<span class="sd"> - str: string exactly matching to_replace will be replaced with value</span>
<span class="sd"> * list of str or numeric:</span>
<span class="sd"> - if to_replace and value are both lists or tuples, they must be the same length.</span>
<span class="sd"> - str and numeric rules apply as above.</span>
<span class="sd"> * dict:</span>
<span class="sd"> - Dicts can be used to specify different replacement values for different</span>
<span class="sd"> existing values.</span>
<span class="sd"> For example, {&#39;a&#39;: &#39;b&#39;, &#39;y&#39;: &#39;z&#39;} replaces the value ‘a’ with ‘b’ and ‘y’</span>
<span class="sd"> with ‘z’. To use a dict in this way the value parameter should be None.</span>
<span class="sd"> - For a DataFrame a dict can specify that different values should be replaced</span>
<span class="sd"> in different columns. For example, {&#39;a&#39;: 1, &#39;b&#39;: &#39;z&#39;} looks for the value 1</span>
<span class="sd"> in column ‘a’ and the value ‘z’ in column ‘b’ and replaces these values with</span>
<span class="sd"> whatever is specified in value.</span>
<span class="sd"> The value parameter should not be None in this case.</span>
<span class="sd"> You can treat this as a special case of passing two lists except that you are</span>
<span class="sd"> specifying the column to search in.</span>
<span class="sd"> See the examples section for examples of each of these.</span>
<span class="sd"> value : scalar, dict, list, tuple, str default None</span>
<span class="sd"> Value to replace any values matching to_replace with.</span>
<span class="sd"> For a DataFrame a dict of values can be used to specify which value to use</span>
<span class="sd"> for each column (columns not in the dict will not be filled).</span>
<span class="sd"> Regular expressions, strings and lists or dicts of such objects are also allowed.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Object after replacement.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Scalar `to_replace` and `value`</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([0, 1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 0</span>
<span class="sd"> 1 1</span>
<span class="sd"> 2 2</span>
<span class="sd"> 3 3</span>
<span class="sd"> 4 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.replace(0, 5)</span>
<span class="sd"> 0 5</span>
<span class="sd"> 1 1</span>
<span class="sd"> 2 2</span>
<span class="sd"> 3 3</span>
<span class="sd"> 4 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> List-like `to_replace`</span>
<span class="sd"> &gt;&gt;&gt; s.replace([0, 4], 5000)</span>
<span class="sd"> 0 5000</span>
<span class="sd"> 1 1</span>
<span class="sd"> 2 2</span>
<span class="sd"> 3 3</span>
<span class="sd"> 4 5000</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.replace([1, 2, 3], [10, 20, 30])</span>
<span class="sd"> 0 0</span>
<span class="sd"> 1 10</span>
<span class="sd"> 2 20</span>
<span class="sd"> 3 30</span>
<span class="sd"> 4 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Dict-like `to_replace`</span>
<span class="sd"> &gt;&gt;&gt; s.replace({1: 1000, 2: 2000, 3: 3000, 4: 4000})</span>
<span class="sd"> 0 0</span>
<span class="sd"> 1 1000</span>
<span class="sd"> 2 2000</span>
<span class="sd"> 3 3000</span>
<span class="sd"> 4 4000</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Also support for MultiIndex</span>
<span class="sd"> &gt;&gt;&gt; midx = pd.MultiIndex([[&#39;lama&#39;, &#39;cow&#39;, &#39;falcon&#39;],</span>
<span class="sd"> ... [&#39;speed&#39;, &#39;weight&#39;, &#39;length&#39;]],</span>
<span class="sd"> ... [[0, 0, 0, 1, 1, 1, 2, 2, 2],</span>
<span class="sd"> ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],</span>
<span class="sd"> ... index=midx)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> lama speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.replace(45, 450)</span>
<span class="sd"> lama speed 450.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.replace([45, 30, 320], 500)</span>
<span class="sd"> lama speed 500.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 500.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 500.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.replace({45: 450, 30: 300})</span>
<span class="sd"> lama speed 450.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> cow speed 300.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">to_replace</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">method</span><span class="o">=</span><span class="s2">&quot;ffill&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="nb">list</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">,</span> <span class="nb">dict</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">float</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;&#39;to_replace&#39; should be one of str, list, tuple, dict, int, float&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">regex</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;replace currently not support for regex&quot;</span><span class="p">)</span>
<span class="n">to_replace</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)</span> <span class="k">else</span> <span class="n">to_replace</span>
<span class="n">value</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">value</span><span class="p">)</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">)</span> <span class="k">else</span> <span class="n">value</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">list</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">len</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">value</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span>
<span class="s2">&quot;Replacement lists must match in length. Expecting </span><span class="si">{}</span><span class="s2"> got </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="nb">len</span><span class="p">(</span><span class="n">to_replace</span><span class="p">),</span> <span class="nb">len</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="n">to_replace</span> <span class="o">=</span> <span class="p">{</span><span class="n">k</span><span class="p">:</span> <span class="n">v</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="n">value</span><span class="p">)}</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">to_replace</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="n">is_start</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">current</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">for</span> <span class="n">to_replace_</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">to_replace</span><span class="o">.</span><span class="n">items</span><span class="p">():</span>
<span class="n">cond</span> <span class="o">=</span> <span class="p">(</span>
<span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">isnan</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span> <span class="o">|</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">())</span>
<span class="k">if</span> <span class="n">pd</span><span class="o">.</span><span class="n">isna</span><span class="p">(</span><span class="n">to_replace_</span><span class="p">)</span>
<span class="k">else</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span> <span class="o">==</span> <span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">to_replace_</span><span class="p">))</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">is_start</span><span class="p">:</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">cond</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
<span class="n">is_start</span> <span class="o">=</span> <span class="kc">False</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">current</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">cond</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">current</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">cond</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isin</span><span class="p">(</span><span class="n">to_replace</span><span class="p">)</span>
<span class="c1"># to_replace may be a scalar</span>
<span class="k">if</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">isna</span><span class="p">(</span><span class="n">to_replace</span><span class="p">))</span><span class="o">.</span><span class="n">any</span><span class="p">():</span>
<span class="n">cond</span> <span class="o">=</span> <span class="n">cond</span> <span class="o">|</span> <span class="n">F</span><span class="o">.</span><span class="n">isnan</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span> <span class="o">|</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">()</span>
<span class="n">current</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">cond</span><span class="p">,</span> <span class="n">value</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">current</span><span class="p">)</span> <span class="c1"># TODO: dtype?</span></div>
<div class="viewcode-block" id="Series.update"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.update.html#pyspark.pandas.Series.update">[docs]</a> <span class="k">def</span> <span class="nf">update</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="s2">&quot;Series&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Modify Series in place using non-NA values from passed Series. Aligns on index.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.pandas.config import set_option, reset_option</span>
<span class="sd"> &gt;&gt;&gt; set_option(&quot;compute.ops_on_diff_frames&quot;, True)</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; s.update(ps.Series([4, 5, 6]))</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 0 4</span>
<span class="sd"> 1 5</span>
<span class="sd"> 2 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&#39;a&#39;, &#39;b&#39;, &#39;c&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s.update(ps.Series([&#39;d&#39;, &#39;e&#39;], index=[0, 2]))</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 0 d</span>
<span class="sd"> 1 b</span>
<span class="sd"> 2 e</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; s.update(ps.Series([4, 5, 6, 7, 8]))</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 0 4</span>
<span class="sd"> 1 5</span>
<span class="sd"> 2 6</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3], index=[10, 11, 12])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 10 1</span>
<span class="sd"> 11 2</span>
<span class="sd"> 12 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.update(ps.Series([4, 5, 6]))</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 10 1</span>
<span class="sd"> 11 2</span>
<span class="sd"> 12 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.update(ps.Series([4, 5, 6], index=[11, 12, 13]))</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 10 1</span>
<span class="sd"> 11 4</span>
<span class="sd"> 12 5</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> If ``other`` contains NaNs the corresponding values are not updated</span>
<span class="sd"> in the original Series.</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; s.update(ps.Series([4, np.nan, 6]))</span>
<span class="sd"> &gt;&gt;&gt; s.sort_index()</span>
<span class="sd"> 0 4.0</span>
<span class="sd"> 1 2.0</span>
<span class="sd"> 2 6.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; reset_option(&quot;compute.ops_on_diff_frames&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Series</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;&#39;other&#39; must be a Series&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
<span class="n">scol</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">(),</span> <span class="n">other</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_column_name_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">))</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_spark_column</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">,</span> <span class="n">scol</span> <span class="c1"># TODO: dtype?</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_update_internal_frame</span><span class="p">(</span><span class="n">internal</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">combine_frames</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">,</span> <span class="n">other</span><span class="o">.</span><span class="n">_psdf</span><span class="p">,</span> <span class="n">how</span><span class="o">=</span><span class="s2">&quot;leftouter&quot;</span><span class="p">)</span>
<span class="n">this_scol</span> <span class="o">=</span> <span class="n">combined</span><span class="p">[</span><span class="s2">&quot;this&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_column_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="n">that_scol</span> <span class="o">=</span> <span class="n">combined</span><span class="p">[</span><span class="s2">&quot;that&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_column_for</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">that_scol</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">(),</span> <span class="n">that_scol</span><span class="p">)</span>
<span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">this_scol</span><span class="p">)</span>
<span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_column_name_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">))</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">combined</span><span class="p">[</span><span class="s2">&quot;this&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_spark_column</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">,</span> <span class="n">scol</span> <span class="c1"># TODO: dtype?</span>
<span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_update_internal_frame</span><span class="p">(</span><span class="n">internal</span><span class="o">.</span><span class="n">resolved_copy</span><span class="p">,</span> <span class="n">requires_same_anchor</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.where"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.where.html#pyspark.pandas.Series.where">[docs]</a> <span class="k">def</span> <span class="nf">where</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cond</span><span class="p">:</span> <span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Replace values where the condition is False.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> cond : boolean Series</span>
<span class="sd"> Where cond is True, keep the original value. Where False,</span>
<span class="sd"> replace with corresponding value from other.</span>
<span class="sd"> other : scalar, Series</span>
<span class="sd"> Entries where cond is False are replaced with corresponding value from other.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.pandas.config import set_option, reset_option</span>
<span class="sd"> &gt;&gt;&gt; set_option(&quot;compute.ops_on_diff_frames&quot;, True)</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([0, 1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([100, 200, 300, 400, 500])</span>
<span class="sd"> &gt;&gt;&gt; s1.where(s1 &gt; 0).sort_index()</span>
<span class="sd"> 0 NaN</span>
<span class="sd"> 1 1.0</span>
<span class="sd"> 2 2.0</span>
<span class="sd"> 3 3.0</span>
<span class="sd"> 4 4.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s1.where(s1 &gt; 1, 10).sort_index()</span>
<span class="sd"> 0 10</span>
<span class="sd"> 1 10</span>
<span class="sd"> 2 2</span>
<span class="sd"> 3 3</span>
<span class="sd"> 4 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s1.where(s1 &gt; 1, s1 + 100).sort_index()</span>
<span class="sd"> 0 100</span>
<span class="sd"> 1 101</span>
<span class="sd"> 2 2</span>
<span class="sd"> 3 3</span>
<span class="sd"> 4 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s1.where(s1 &gt; 1, s2).sort_index()</span>
<span class="sd"> 0 100</span>
<span class="sd"> 1 200</span>
<span class="sd"> 2 2</span>
<span class="sd"> 3 3</span>
<span class="sd"> 4 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; reset_option(&quot;compute.ops_on_diff_frames&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">cond</span><span class="p">,</span> <span class="n">Series</span><span class="p">)</span>
<span class="c1"># We should check the DataFrame from both `cond` and `other`.</span>
<span class="n">should_try_ops_on_diff_frame</span> <span class="o">=</span> <span class="ow">not</span> <span class="n">same_anchor</span><span class="p">(</span><span class="n">cond</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span> <span class="ow">or</span> <span class="p">(</span>
<span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Series</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">same_anchor</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">should_try_ops_on_diff_frame</span><span class="p">:</span>
<span class="c1"># Try to perform it with &#39;compute.ops_on_diff_frame&#39; option.</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span>
<span class="n">tmp_cond_col</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">psdf</span><span class="p">,</span> <span class="s2">&quot;__tmp_cond_col__&quot;</span><span class="p">)</span>
<span class="n">tmp_other_col</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">psdf</span><span class="p">,</span> <span class="s2">&quot;__tmp_other_col__&quot;</span><span class="p">)</span>
<span class="n">psdf</span><span class="p">[</span><span class="n">tmp_cond_col</span><span class="p">]</span> <span class="o">=</span> <span class="n">cond</span>
<span class="n">psdf</span><span class="p">[</span><span class="n">tmp_other_col</span><span class="p">]</span> <span class="o">=</span> <span class="n">other</span>
<span class="c1"># above logic makes a Spark DataFrame looks like below:</span>
<span class="c1"># +-----------------+---+----------------+-----------------+</span>
<span class="c1"># |__index_level_0__| 0|__tmp_cond_col__|__tmp_other_col__|</span>
<span class="c1"># +-----------------+---+----------------+-----------------+</span>
<span class="c1"># | 0| 0| false| 100|</span>
<span class="c1"># | 1| 1| false| 200|</span>
<span class="c1"># | 3| 3| true| 400|</span>
<span class="c1"># | 2| 2| true| 300|</span>
<span class="c1"># | 4| 4| true| 500|</span>
<span class="c1"># +-----------------+---+----------------+-----------------+</span>
<span class="n">condition</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="n">psdf</span><span class="p">[</span><span class="n">tmp_cond_col</span><span class="p">]</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span>
<span class="p">)</span>
<span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">psdf</span><span class="p">[</span><span class="n">tmp_other_col</span><span class="p">]</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_columns</span><span class="p">(</span>
<span class="p">[</span><span class="n">condition</span><span class="p">],</span> <span class="n">column_labels</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Series</span><span class="p">):</span>
<span class="n">other</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">condition</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">cond</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
<span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">condition</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.mask"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.mask.html#pyspark.pandas.Series.mask">[docs]</a> <span class="k">def</span> <span class="nf">mask</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cond</span><span class="p">:</span> <span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Any</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Replace values where the condition is True.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> cond : boolean Series</span>
<span class="sd"> Where cond is False, keep the original value. Where True,</span>
<span class="sd"> replace with corresponding value from other.</span>
<span class="sd"> other : scalar, Series</span>
<span class="sd"> Entries where cond is True are replaced with corresponding value from other.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.pandas.config import set_option, reset_option</span>
<span class="sd"> &gt;&gt;&gt; set_option(&quot;compute.ops_on_diff_frames&quot;, True)</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([0, 1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([100, 200, 300, 400, 500])</span>
<span class="sd"> &gt;&gt;&gt; s1.mask(s1 &gt; 0).sort_index()</span>
<span class="sd"> 0 0.0</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 NaN</span>
<span class="sd"> 3 NaN</span>
<span class="sd"> 4 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s1.mask(s1 &gt; 1, 10).sort_index()</span>
<span class="sd"> 0 0</span>
<span class="sd"> 1 1</span>
<span class="sd"> 2 10</span>
<span class="sd"> 3 10</span>
<span class="sd"> 4 10</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s1.mask(s1 &gt; 1, s1 + 100).sort_index()</span>
<span class="sd"> 0 0</span>
<span class="sd"> 1 1</span>
<span class="sd"> 2 102</span>
<span class="sd"> 3 103</span>
<span class="sd"> 4 104</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s1.mask(s1 &gt; 1, s2).sort_index()</span>
<span class="sd"> 0 0</span>
<span class="sd"> 1 1</span>
<span class="sd"> 2 300</span>
<span class="sd"> 3 400</span>
<span class="sd"> 4 500</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; reset_option(&quot;compute.ops_on_diff_frames&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="o">~</span><span class="n">cond</span><span class="p">,</span> <span class="n">other</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.xs"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.xs.html#pyspark.pandas.Series.xs">[docs]</a> <span class="k">def</span> <span class="nf">xs</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">Name</span><span class="p">,</span> <span class="n">level</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return cross-section from the Series.</span>
<span class="sd"> This method takes a `key` argument to select data at a particular</span>
<span class="sd"> level of a MultiIndex.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> key : label or tuple of label</span>
<span class="sd"> Label contained in the index, or partially in a MultiIndex.</span>
<span class="sd"> level : object, defaults to first n levels (n=1 or len(key))</span>
<span class="sd"> In case of a key partially contained in a MultiIndex, indicate</span>
<span class="sd"> which levels are used. Levels can be referred by label or position.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Cross-section from the original Series</span>
<span class="sd"> corresponding to the selected index levels.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; midx = pd.MultiIndex([[&#39;a&#39;, &#39;b&#39;, &#39;c&#39;],</span>
<span class="sd"> ... [&#39;lama&#39;, &#39;cow&#39;, &#39;falcon&#39;],</span>
<span class="sd"> ... [&#39;speed&#39;, &#39;weight&#39;, &#39;length&#39;]],</span>
<span class="sd"> ... [[0, 0, 0, 1, 1, 1, 2, 2, 2],</span>
<span class="sd"> ... [0, 0, 0, 1, 1, 1, 2, 2, 2],</span>
<span class="sd"> ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],</span>
<span class="sd"> ... index=midx)</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> a lama speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> b cow speed 30.0</span>
<span class="sd"> weight 250.0</span>
<span class="sd"> length 1.5</span>
<span class="sd"> c falcon speed 320.0</span>
<span class="sd"> weight 1.0</span>
<span class="sd"> length 0.3</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Get values at specified index</span>
<span class="sd"> &gt;&gt;&gt; s.xs(&#39;a&#39;)</span>
<span class="sd"> lama speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Get values at several indexes</span>
<span class="sd"> &gt;&gt;&gt; s.xs((&#39;a&#39;, &#39;lama&#39;))</span>
<span class="sd"> speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Get values at specified index and level</span>
<span class="sd"> &gt;&gt;&gt; s.xs(&#39;lama&#39;, level=1)</span>
<span class="sd"> a speed 45.0</span>
<span class="sd"> weight 200.0</span>
<span class="sd"> length 1.2</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">):</span>
<span class="n">key</span> <span class="o">=</span> <span class="p">(</span><span class="n">key</span><span class="p">,)</span>
<span class="k">if</span> <span class="n">level</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">level</span> <span class="o">=</span> <span class="mi">0</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span>
<span class="n">scols</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">[:</span><span class="n">level</span><span class="p">]</span>
<span class="o">+</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">[</span><span class="n">level</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> <span class="p">:]</span>
<span class="o">+</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">]</span>
<span class="p">)</span>
<span class="n">rows</span> <span class="o">=</span> <span class="p">[</span><span class="n">internal</span><span class="o">.</span><span class="n">spark_columns</span><span class="p">[</span><span class="n">lvl</span><span class="p">]</span> <span class="o">==</span> <span class="n">index</span> <span class="k">for</span> <span class="n">lvl</span><span class="p">,</span> <span class="n">index</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="n">level</span><span class="p">)]</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">reduce</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">,</span> <span class="n">y</span><span class="p">:</span> <span class="n">x</span> <span class="o">&amp;</span> <span class="n">y</span><span class="p">,</span> <span class="n">rows</span><span class="p">))</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">scols</span><span class="p">)</span>
<span class="k">if</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_level</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">key</span><span class="p">):</span>
<span class="c1"># if spark_frame has one column and one data, return data only without frame</span>
<span class="n">pdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">toPandas</span><span class="p">()</span>
<span class="n">length</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">pdf</span><span class="p">)</span>
<span class="k">if</span> <span class="n">length</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">return</span> <span class="n">pdf</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">index_spark_column_names</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">internal</span><span class="o">.</span><span class="n">index_spark_column_names</span><span class="p">[:</span><span class="n">level</span><span class="p">]</span>
<span class="o">+</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_spark_column_names</span><span class="p">[</span><span class="n">level</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> <span class="p">:]</span>
<span class="p">)</span>
<span class="n">index_names</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_names</span><span class="p">[:</span><span class="n">level</span><span class="p">]</span> <span class="o">+</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_names</span><span class="p">[</span><span class="n">level</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> <span class="p">:]</span>
<span class="n">index_fields</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_fields</span><span class="p">[:</span><span class="n">level</span><span class="p">]</span> <span class="o">+</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_fields</span><span class="p">[</span><span class="n">level</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> <span class="p">:]</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">index_spark_column_names</span><span class="p">],</span>
<span class="n">index_names</span><span class="o">=</span><span class="n">index_names</span><span class="p">,</span>
<span class="n">index_fields</span><span class="o">=</span><span class="n">index_fields</span><span class="p">,</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])],</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span></div>
<div class="viewcode-block" id="Series.pct_change"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.pct_change.html#pyspark.pandas.Series.pct_change">[docs]</a> <span class="k">def</span> <span class="nf">pct_change</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">periods</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">1</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Percentage change between the current and a prior element.</span>
<span class="sd"> .. note:: the current implementation of this API uses Spark&#39;s Window without</span>
<span class="sd"> specifying partition specification. This leads to move all data into</span>
<span class="sd"> single partition in single machine and could cause serious</span>
<span class="sd"> performance degradation. Avoid this method against very large dataset.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> periods : int, default 1</span>
<span class="sd"> Periods to shift for forming percent change.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([90, 91, 85], index=[2, 4, 1])</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 2 90</span>
<span class="sd"> 4 91</span>
<span class="sd"> 1 85</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; psser.pct_change()</span>
<span class="sd"> 2 NaN</span>
<span class="sd"> 4 0.011111</span>
<span class="sd"> 1 -0.065934</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; psser.sort_index().pct_change()</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 0.058824</span>
<span class="sd"> 4 0.011111</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; psser.pct_change(periods=2)</span>
<span class="sd"> 2 NaN</span>
<span class="sd"> 4 NaN</span>
<span class="sd"> 1 -0.055556</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">window</span> <span class="o">=</span> <span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span><span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="o">-</span><span class="n">periods</span><span class="p">,</span> <span class="o">-</span><span class="n">periods</span><span class="p">)</span>
<span class="n">prev_row</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">lag</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">periods</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">((</span><span class="n">scol</span> <span class="o">-</span> <span class="n">prev_row</span><span class="p">)</span> <span class="o">/</span> <span class="n">prev_row</span><span class="p">)</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">analyzed</span></div>
<div class="viewcode-block" id="Series.combine_first"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.combine_first.html#pyspark.pandas.Series.combine_first">[docs]</a> <span class="k">def</span> <span class="nf">combine_first</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="s2">&quot;Series&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Combine Series values, choosing the calling Series&#39;s values first.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series</span>
<span class="sd"> The value(s) to be combined with the `Series`.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> The result of combining the Series with the other object.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.combine : Perform elementwise operation on two Series</span>
<span class="sd"> using a given function.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Result index will be the union of the two indexes.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([1, np.nan])</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([3, 4])</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.ops_on_diff_frames&quot;, True):</span>
<span class="sd"> ... s1.combine_first(s2)</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 1 4.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;`combine_first` only allows `Series` for parameter `other`&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
<span class="n">this</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">that</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">combined</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">combine_frames</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">,</span> <span class="n">other</span><span class="o">.</span><span class="n">_psdf</span><span class="p">)</span>
<span class="n">this</span> <span class="o">=</span> <span class="n">combined</span><span class="p">[</span><span class="s2">&quot;this&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_column_for</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="n">that</span> <span class="o">=</span> <span class="n">combined</span><span class="p">[</span><span class="s2">&quot;that&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_column_for</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">_column_label</span><span class="p">)</span>
<span class="c1"># If `self` has missing value, use value of `other`</span>
<span class="n">cond</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">this</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span> <span class="n">that</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">this</span><span class="p">)</span>
<span class="c1"># If `self` and `other` come from same frame, the anchor should be kept</span>
<span class="k">if</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">cond</span><span class="p">)</span> <span class="c1"># TODO: dtype?</span>
<span class="n">index_scols</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="o">*</span><span class="n">index_scols</span><span class="p">,</span> <span class="n">cond</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="p">)</span><span class="o">.</span><span class="n">distinct</span><span class="p">()</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_sdf</span><span class="p">(</span>
<span class="n">sdf</span><span class="p">,</span> <span class="n">index_fields</span><span class="o">=</span><span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_fields</span><span class="p">,</span> <span class="n">data_fields</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">]</span> <span class="c1"># TODO: dtype?</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span></div>
<div class="viewcode-block" id="Series.dot"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.dot.html#pyspark.pandas.Series.dot">[docs]</a> <span class="k">def</span> <span class="nf">dot</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compute the dot product between the Series and the columns of other.</span>
<span class="sd"> This method computes the dot product between the Series and another</span>
<span class="sd"> one, or the Series and each columns of a DataFrame.</span>
<span class="sd"> It can also be called using `self @ other` in Python &gt;= 3.5.</span>
<span class="sd"> .. note:: This API is slightly different from pandas when indexes from both Series</span>
<span class="sd"> are not aligned and config &#39;compute.eager_check&#39; is False. pandas raises an exception;</span>
<span class="sd"> however, pandas-on-Spark just proceeds and performs by ignoring mismatches with NaN</span>
<span class="sd"> permissively.</span>
<span class="sd"> &gt;&gt;&gt; pdf1 = pd.Series([1, 2, 3], index=[0, 1, 2])</span>
<span class="sd"> &gt;&gt;&gt; pdf2 = pd.Series([1, 2, 3], index=[0, 1, 3])</span>
<span class="sd"> &gt;&gt;&gt; pdf1.dot(pdf2) # doctest: +SKIP</span>
<span class="sd"> ...</span>
<span class="sd"> ValueError: matrices are not aligned</span>
<span class="sd"> &gt;&gt;&gt; psdf1 = ps.Series([1, 2, 3], index=[0, 1, 2])</span>
<span class="sd"> &gt;&gt;&gt; psdf2 = ps.Series([1, 2, 3], index=[0, 1, 3])</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.eager_check&quot;, False):</span>
<span class="sd"> ... psdf1.dot(psdf2) # doctest: +SKIP</span>
<span class="sd"> ...</span>
<span class="sd"> 5</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series, DataFrame.</span>
<span class="sd"> The other object to compute the dot product with its columns.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> scalar, Series</span>
<span class="sd"> Return the dot product of the Series and other if other is a</span>
<span class="sd"> Series, the Series of the dot product of Series and each rows of</span>
<span class="sd"> other if other is a DataFrame.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> The Series and other has to share the same index if other is a Series</span>
<span class="sd"> or a DataFrame.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([0, 1, 2, 3])</span>
<span class="sd"> &gt;&gt;&gt; s.dot(s)</span>
<span class="sd"> 14</span>
<span class="sd"> &gt;&gt;&gt; s @ s</span>
<span class="sd"> 14</span>
<span class="sd"> &gt;&gt;&gt; psdf = ps.DataFrame({&#39;x&#39;: [0, 1, 2, 3], &#39;y&#39;: [0, -1, -2, -3]})</span>
<span class="sd"> &gt;&gt;&gt; psdf</span>
<span class="sd"> x y</span>
<span class="sd"> 0 0 0</span>
<span class="sd"> 1 1 -1</span>
<span class="sd"> 2 2 -2</span>
<span class="sd"> 3 3 -3</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.ops_on_diff_frames&quot;, True):</span>
<span class="sd"> ... s.dot(psdf)</span>
<span class="sd"> ...</span>
<span class="sd"> x 14</span>
<span class="sd"> y -14</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
<span class="k">if</span> <span class="n">get_option</span><span class="p">(</span><span class="s2">&quot;compute.eager_check&quot;</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">sort_values</span><span class="p">()</span><span class="o">.</span><span class="n">equals</span><span class="p">(</span>
<span class="n">other</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">sort_values</span><span class="p">()</span>
<span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;matrices are not aligned&quot;</span><span class="p">)</span>
<span class="k">elif</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">)</span> <span class="o">!=</span> <span class="nb">len</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">index</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;matrices are not aligned&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">):</span>
<span class="n">other_copy</span><span class="p">:</span> <span class="n">DataFrame</span> <span class="o">=</span> <span class="n">other</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">column_labels</span> <span class="o">=</span> <span class="n">other_copy</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span>
<span class="n">self_column_label</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">other_copy</span><span class="p">,</span> <span class="s2">&quot;__self_column__&quot;</span><span class="p">)</span>
<span class="n">other_copy</span><span class="p">[</span><span class="n">self_column_label</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span>
<span class="n">self_psser</span> <span class="o">=</span> <span class="n">other_copy</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">self_column_label</span><span class="p">)</span>
<span class="n">product_pssers</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">cast</span><span class="p">(</span><span class="n">Series</span><span class="p">,</span> <span class="n">other_copy</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">label</span><span class="p">)</span> <span class="o">*</span> <span class="n">self_psser</span><span class="p">)</span> <span class="k">for</span> <span class="n">label</span> <span class="ow">in</span> <span class="n">column_labels</span>
<span class="p">]</span>
<span class="n">dot_product_psser</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span>
<span class="n">other_copy</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_columns</span><span class="p">(</span><span class="n">product_pssers</span><span class="p">,</span> <span class="n">column_labels</span><span class="o">=</span><span class="n">column_labels</span><span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">Series</span><span class="p">,</span> <span class="n">dot_product_psser</span><span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">Series</span><span class="p">)</span>
<span class="k">return</span> <span class="p">(</span><span class="bp">self</span> <span class="o">*</span> <span class="n">other</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span></div>
<span class="k">def</span> <span class="fm">__matmul__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Matrix multiplication using binary `@` operator in Python&gt;=3.5.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
<div class="viewcode-block" id="Series.repeat"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.repeat.html#pyspark.pandas.Series.repeat">[docs]</a> <span class="k">def</span> <span class="nf">repeat</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">repeats</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Repeat elements of a Series.</span>
<span class="sd"> Returns a new Series where each element of the current Series</span>
<span class="sd"> is repeated consecutively a given number of times.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> repeats : int or Series</span>
<span class="sd"> The number of repetitions for each element. This should be a</span>
<span class="sd"> non-negative integer. Repeating 0 times will return an empty</span>
<span class="sd"> Series.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Newly created Series with repeated elements.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Index.repeat : Equivalent function for Index.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&#39;a&#39;, &#39;b&#39;, &#39;c&#39;])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 a</span>
<span class="sd"> 1 b</span>
<span class="sd"> 2 c</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; s.repeat(2)</span>
<span class="sd"> 0 a</span>
<span class="sd"> 1 b</span>
<span class="sd"> 2 c</span>
<span class="sd"> 0 a</span>
<span class="sd"> 1 b</span>
<span class="sd"> 2 c</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; ps.Series([1, 2, 3]).repeat(0)</span>
<span class="sd"> Series([], dtype: int64)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">repeats</span><span class="p">,</span> <span class="p">(</span><span class="nb">int</span><span class="p">,</span> <span class="n">Series</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;`repeats` argument must be integer or Series, but got </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">repeats</span><span class="p">))</span>
<span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">repeats</span><span class="p">,</span> <span class="n">Series</span><span class="p">):</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">repeats</span><span class="p">):</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span>
<span class="n">temp_repeats</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">psdf</span><span class="p">,</span> <span class="s2">&quot;__temp_repeats__&quot;</span><span class="p">)</span>
<span class="n">psdf</span><span class="p">[</span><span class="n">temp_repeats</span><span class="p">]</span> <span class="o">=</span> <span class="n">repeats</span>
<span class="k">return</span> <span class="p">(</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="o">.</span><span class="n">repeat</span><span class="p">(</span><span class="n">psdf</span><span class="p">[</span><span class="n">temp_repeats</span><span class="p">])</span>
<span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">explode</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">array_repeat</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> <span class="n">repeats</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s2">&quot;int32&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name_like_string</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">))</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span> <span class="o">+</span> <span class="p">[</span><span class="n">scol</span><span class="p">])</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="p">[</span>
<span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_column_names</span>
<span class="p">],</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">name_like_string</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">))],</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">repeats</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;negative dimensions are not allowed&quot;</span><span class="p">)</span>
<span class="n">psdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="p">[[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]]</span>
<span class="k">if</span> <span class="n">repeats</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_filter</span><span class="p">(</span><span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">False</span><span class="p">))))</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">cast</span><span class="p">(</span><span class="s2">&quot;ps.DataFrame&quot;</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">concat</span><span class="p">([</span><span class="n">psdf</span><span class="p">]</span> <span class="o">*</span> <span class="n">repeats</span><span class="p">)))</span></div>
<div class="viewcode-block" id="Series.asof"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.asof.html#pyspark.pandas.Series.asof">[docs]</a> <span class="k">def</span> <span class="nf">asof</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">where</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Any</span><span class="p">,</span> <span class="n">List</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Scalar</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the last row(s) without any NaNs before `where`.</span>
<span class="sd"> The last row (for each element in `where`, if list) without any</span>
<span class="sd"> NaN is taken.</span>
<span class="sd"> If there is no good value, NaN is returned.</span>
<span class="sd"> .. note:: This API is dependent on :meth:`Index.is_monotonic_increasing`</span>
<span class="sd"> which is expensive.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> where : index or array-like of indices</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> scalar or Series</span>
<span class="sd"> The return can be:</span>
<span class="sd"> * scalar : when `self` is a Series and `where` is a scalar</span>
<span class="sd"> * Series: when `self` is a Series and `where` is an array-like</span>
<span class="sd"> Return scalar or Series</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Indices are assumed to be sorted. Raises if this is not the case and config</span>
<span class="sd"> &#39;compute.eager_check&#39; is True. If &#39;compute.eager_check&#39; is False pandas-on-Spark just</span>
<span class="sd"> proceeds and performs by ignoring the indeces&#39;s order</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 10 1.0</span>
<span class="sd"> 20 2.0</span>
<span class="sd"> 30 NaN</span>
<span class="sd"> 40 4.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> A scalar `where`.</span>
<span class="sd"> &gt;&gt;&gt; s.asof(20)</span>
<span class="sd"> 2.0</span>
<span class="sd"> For a sequence `where`, a Series is returned. The first value is</span>
<span class="sd"> NaN, because the first element of `where` is before the first</span>
<span class="sd"> index value.</span>
<span class="sd"> &gt;&gt;&gt; s.asof([5, 20]).sort_index()</span>
<span class="sd"> 5 NaN</span>
<span class="sd"> 20 2.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> Missing values are not considered. The following is ``2.0``, not</span>
<span class="sd"> NaN, even though NaN is at the index location for ``30``.</span>
<span class="sd"> &gt;&gt;&gt; s.asof(30)</span>
<span class="sd"> 2.0</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, np.nan, 4], index=[10, 30, 20, 40])</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.eager_check&quot;, False):</span>
<span class="sd"> ... s.asof(20)</span>
<span class="sd"> ...</span>
<span class="sd"> 1.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">should_return_series</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">MultiIndex</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;asof is not supported for a MultiIndex&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">where</span><span class="p">,</span> <span class="p">(</span><span class="n">ps</span><span class="o">.</span><span class="n">Index</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">Series</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">)):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;where cannot be an Index, Series or a DataFrame&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">get_option</span><span class="p">(</span><span class="s2">&quot;compute.eager_check&quot;</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">is_monotonic_increasing</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;asof requires a sorted index&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_list_like</span><span class="p">(</span><span class="n">where</span><span class="p">):</span>
<span class="n">should_return_series</span> <span class="o">=</span> <span class="kc">False</span>
<span class="n">where</span> <span class="o">=</span> <span class="p">[</span><span class="n">where</span><span class="p">]</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">resolved_copy</span>
<span class="n">index_scol</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">index_type</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">spark_type_for</span><span class="p">(</span><span class="n">index_scol</span><span class="p">)</span>
<span class="n">spark_column</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">data_spark_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">monotonically_increasing_id_column</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span>
<span class="n">internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="p">,</span> <span class="s2">&quot;__monotonically_increasing_id__&quot;</span>
<span class="p">)</span>
<span class="n">cond</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">F</span><span class="o">.</span><span class="n">max_by</span><span class="p">(</span>
<span class="n">spark_column</span><span class="p">,</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="p">(</span><span class="n">index_scol</span> <span class="o">&lt;=</span> <span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="n">index</span><span class="p">)</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">index_type</span><span class="p">))</span> <span class="o">&amp;</span> <span class="n">spark_column</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">()</span>
<span class="k">if</span> <span class="n">pd</span><span class="o">.</span><span class="n">notna</span><span class="p">(</span><span class="n">index</span><span class="p">)</span>
<span class="c1"># If index is nan and the value of the col is not null</span>
<span class="c1"># then return monotonically_increasing_id .This will let max by</span>
<span class="c1"># to return last index value , which is the behaviour of pandas</span>
<span class="k">else</span> <span class="n">spark_column</span><span class="o">.</span><span class="n">isNotNull</span><span class="p">(),</span>
<span class="n">monotonically_increasing_id_column</span><span class="p">,</span>
<span class="p">),</span>
<span class="p">)</span>
<span class="k">for</span> <span class="n">index</span> <span class="ow">in</span> <span class="n">where</span>
<span class="p">]</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">withColumn</span><span class="p">(</span>
<span class="n">monotonically_increasing_id_column</span><span class="p">,</span> <span class="n">F</span><span class="o">.</span><span class="n">monotonically_increasing_id</span><span class="p">()</span>
<span class="p">)</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">cond</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">should_return_series</span><span class="p">:</span>
<span class="k">with</span> <span class="n">sql_conf</span><span class="p">({</span><span class="n">SPARK_CONF_ARROW_ENABLED</span><span class="p">:</span> <span class="kc">False</span><span class="p">}):</span>
<span class="c1"># Disable Arrow to keep row ordering.</span>
<span class="n">result</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">toPandas</span><span class="p">()</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">]</span>
<span class="k">return</span> <span class="n">result</span> <span class="k">if</span> <span class="n">result</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="c1"># The data is expected to be small so it&#39;s fine to transpose/use default index.</span>
<span class="k">with</span> <span class="n">ps</span><span class="o">.</span><span class="n">option_context</span><span class="p">(</span><span class="s2">&quot;compute.default_index_type&quot;</span><span class="p">,</span> <span class="s2">&quot;distributed&quot;</span><span class="p">,</span> <span class="s2">&quot;compute.max_rows&quot;</span><span class="p">,</span> <span class="mi">1</span><span class="p">):</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">where</span><span class="p">)</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="nb">set</span><span class="p">(</span><span class="n">where</span><span class="p">))</span> <span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">index_type</span><span class="p">,</span> <span class="n">TimestampType</span><span class="p">):</span>
<span class="n">psdf</span><span class="p">:</span> <span class="n">DataFrame</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">sdf</span><span class="p">)</span>
<span class="n">psdf</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Index</span><span class="p">(</span><span class="n">where</span><span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">psdf</span><span class="o">.</span><span class="n">transpose</span><span class="p">())</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># If `where` has duplicate items, leverage the pandas directly</span>
<span class="c1"># since pandas API on Spark doesn&#39;t support the duplicate column name.</span>
<span class="n">pdf</span><span class="p">:</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">toPandas</span><span class="p">()</span>
<span class="n">pdf</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Index</span><span class="p">(</span><span class="n">where</span><span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">pdf</span><span class="o">.</span><span class="n">transpose</span><span class="p">()))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.mad"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.mad.html#pyspark.pandas.Series.mad">[docs]</a> <span class="k">def</span> <span class="nf">mad</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">float</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the mean absolute deviation of values.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4])</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.mad()</span>
<span class="sd"> 1.0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span>
<span class="n">spark_column</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">avg</span> <span class="o">=</span> <span class="n">unpack_scalar</span><span class="p">(</span><span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">avg</span><span class="p">(</span><span class="n">spark_column</span><span class="p">)))</span>
<span class="n">mad</span> <span class="o">=</span> <span class="n">unpack_scalar</span><span class="p">(</span><span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">avg</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">spark_column</span> <span class="o">-</span> <span class="n">avg</span><span class="p">))))</span>
<span class="k">return</span> <span class="n">mad</span></div>
<div class="viewcode-block" id="Series.unstack"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.unstack.html#pyspark.pandas.Series.unstack">[docs]</a> <span class="k">def</span> <span class="nf">unstack</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">level</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Unstack, a.k.a. pivot, Series with MultiIndex to produce DataFrame.</span>
<span class="sd"> The level involved will automatically get sorted.</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Unlike pandas, pandas-on-Spark doesn&#39;t check whether an index is duplicated or not</span>
<span class="sd"> because the checking of duplicated index requires scanning whole data which</span>
<span class="sd"> can be quite expensive.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> level : int, str, or list of these, default last level</span>
<span class="sd"> Level(s) to unstack, can pass level name.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> DataFrame</span>
<span class="sd"> Unstacked Series.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([1, 2, 3, 4],</span>
<span class="sd"> ... index=pd.MultiIndex.from_product([[&#39;one&#39;, &#39;two&#39;],</span>
<span class="sd"> ... [&#39;a&#39;, &#39;b&#39;]]))</span>
<span class="sd"> &gt;&gt;&gt; s</span>
<span class="sd"> one a 1</span>
<span class="sd"> b 2</span>
<span class="sd"> two a 3</span>
<span class="sd"> b 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; s.unstack(level=-1).sort_index()</span>
<span class="sd"> a b</span>
<span class="sd"> one 1 2</span>
<span class="sd"> two 3 4</span>
<span class="sd"> &gt;&gt;&gt; s.unstack(level=0).sort_index()</span>
<span class="sd"> one two</span>
<span class="sd"> a 1 3</span>
<span class="sd"> b 2 4</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">,</span> <span class="n">ps</span><span class="o">.</span><span class="n">MultiIndex</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Series.unstack only support for a MultiIndex&quot;</span><span class="p">)</span>
<span class="n">index_nlevels</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">nlevels</span>
<span class="k">if</span> <span class="n">level</span> <span class="o">&gt;</span> <span class="mi">0</span> <span class="ow">and</span> <span class="p">(</span><span class="n">level</span> <span class="o">&gt;</span> <span class="n">index_nlevels</span> <span class="o">-</span> <span class="mi">1</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">IndexError</span><span class="p">(</span>
<span class="s2">&quot;Too many levels: Index has only </span><span class="si">{}</span><span class="s2"> levels, not </span><span class="si">{}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">index_nlevels</span><span class="p">,</span> <span class="n">level</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">elif</span> <span class="n">level</span> <span class="o">&lt;</span> <span class="mi">0</span> <span class="ow">and</span> <span class="p">(</span><span class="n">level</span> <span class="o">&lt;</span> <span class="o">-</span><span class="n">index_nlevels</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">IndexError</span><span class="p">(</span>
<span class="s2">&quot;Too many levels: Index has only </span><span class="si">{}</span><span class="s2"> levels, </span><span class="si">{}</span><span class="s2"> is not a valid level number&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">index_nlevels</span><span class="p">,</span> <span class="n">level</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">resolved_copy</span>
<span class="n">index_map</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
<span class="nb">zip</span><span class="p">(</span><span class="n">internal</span><span class="o">.</span><span class="n">index_spark_column_names</span><span class="p">,</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_names</span><span class="p">,</span> <span class="n">internal</span><span class="o">.</span><span class="n">index_fields</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">pivot_col</span><span class="p">,</span> <span class="n">column_label_names</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">index_map</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="n">level</span><span class="p">)</span>
<span class="n">index_scol_names</span><span class="p">,</span> <span class="n">index_names</span><span class="p">,</span> <span class="n">index_fields</span> <span class="o">=</span> <span class="nb">zip</span><span class="p">(</span><span class="o">*</span><span class="n">index_map</span><span class="p">)</span>
<span class="n">col</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">spark_frame</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="n">index_scol_names</span><span class="p">))</span><span class="o">.</span><span class="n">pivot</span><span class="p">(</span><span class="n">pivot_col</span><span class="p">)</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">first</span><span class="p">(</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">col</span><span class="p">)))</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="n">index_scol_names</span><span class="p">],</span>
<span class="n">index_names</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">index_names</span><span class="p">),</span>
<span class="n">index_fields</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="n">index_fields</span><span class="p">),</span>
<span class="n">column_label_names</span><span class="o">=</span><span class="p">[</span><span class="n">column_label_names</span><span class="p">],</span>
<span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">internal</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span>
<span class="n">field</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">dtype</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
<span class="k">for</span> <span class="n">field</span> <span class="ow">in</span> <span class="n">internal</span><span class="o">.</span><span class="n">data_fields</span>
<span class="p">]</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.item"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.item.html#pyspark.pandas.Series.item">[docs]</a> <span class="k">def</span> <span class="nf">item</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Scalar</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the first element of the underlying data as a Python scalar.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> scalar</span>
<span class="sd"> The first element of Series.</span>
<span class="sd"> Raises</span>
<span class="sd"> ------</span>
<span class="sd"> ValueError</span>
<span class="sd"> If the data is not length-1.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([10])</span>
<span class="sd"> &gt;&gt;&gt; psser.item()</span>
<span class="sd"> 10</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">head</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">()</span></div>
<div class="viewcode-block" id="Series.iteritems"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.iteritems.html#pyspark.pandas.Series.iteritems">[docs]</a> <span class="k">def</span> <span class="nf">iteritems</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Lazily iterate over (index, value) tuples.</span>
<span class="sd"> This method returns an iterable tuple (index, value). This is</span>
<span class="sd"> convenient if you want to create a lazy iterator.</span>
<span class="sd"> .. note:: Unlike pandas&#39;, the iteritems in pandas-on-Spark returns generator rather</span>
<span class="sd"> zip object</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> iterable</span>
<span class="sd"> Iterable of tuples containing the (index, value) pairs from a</span>
<span class="sd"> Series.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> DataFrame.items : Iterate over (column name, Series) pairs.</span>
<span class="sd"> DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series([&#39;A&#39;, &#39;B&#39;, &#39;C&#39;])</span>
<span class="sd"> &gt;&gt;&gt; for index, value in s.items():</span>
<span class="sd"> ... print(&quot;Index : {}, Value : {}&quot;.format(index, value))</span>
<span class="sd"> Index : 0, Value : A</span>
<span class="sd"> Index : 1, Value : B</span>
<span class="sd"> Index : 2, Value : C</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">internal_index_columns</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_column_names</span>
<span class="n">internal_data_column</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">def</span> <span class="nf">extract_kv_from_spark_row</span><span class="p">(</span><span class="n">row</span><span class="p">:</span> <span class="n">Row</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">Any</span><span class="p">]:</span>
<span class="n">k</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">row</span><span class="p">[</span><span class="n">internal_index_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">internal_index_columns</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span>
<span class="k">else</span> <span class="nb">tuple</span><span class="p">(</span><span class="n">row</span><span class="p">[</span><span class="n">c</span><span class="p">]</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">internal_index_columns</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">v</span> <span class="o">=</span> <span class="n">row</span><span class="p">[</span><span class="n">internal_data_column</span><span class="p">]</span>
<span class="k">return</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span>
<span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">map</span><span class="p">(</span>
<span class="n">extract_kv_from_spark_row</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">resolved_copy</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">toLocalIterator</span><span class="p">()</span>
<span class="p">):</span>
<span class="k">yield</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span></div>
<div class="viewcode-block" id="Series.items"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.items.html#pyspark.pandas.Series.items">[docs]</a> <span class="k">def</span> <span class="nf">items</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">Tuple</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]:</span>
<span class="sd">&quot;&quot;&quot;This is an alias of ``iteritems``.&quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">iteritems</span><span class="p">()</span></div>
<div class="viewcode-block" id="Series.droplevel"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.droplevel.html#pyspark.pandas.Series.droplevel">[docs]</a> <span class="k">def</span> <span class="nf">droplevel</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">level</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Name</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Name</span><span class="p">]]])</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return Series with requested index level(s) removed.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> level : int, str, or list-like</span>
<span class="sd"> If a string is given, must be the name of a level</span>
<span class="sd"> If list-like, elements must be names or positional indexes</span>
<span class="sd"> of levels.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Series with requested index level(s) removed.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series(</span>
<span class="sd"> ... [1, 2, 3],</span>
<span class="sd"> ... index=pd.MultiIndex.from_tuples(</span>
<span class="sd"> ... [(&quot;x&quot;, &quot;a&quot;), (&quot;x&quot;, &quot;b&quot;), (&quot;y&quot;, &quot;c&quot;)], names=[&quot;level_1&quot;, &quot;level_2&quot;]</span>
<span class="sd"> ... ),</span>
<span class="sd"> ... )</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> level_1 level_2</span>
<span class="sd"> x a 1</span>
<span class="sd"> b 2</span>
<span class="sd"> y c 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Removing specific index level by level</span>
<span class="sd"> &gt;&gt;&gt; psser.droplevel(0)</span>
<span class="sd"> level_2</span>
<span class="sd"> a 1</span>
<span class="sd"> b 2</span>
<span class="sd"> c 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> Removing specific index level by name</span>
<span class="sd"> &gt;&gt;&gt; psser.droplevel(&quot;level_2&quot;)</span>
<span class="sd"> level_1</span>
<span class="sd"> x 1</span>
<span class="sd"> x 2</span>
<span class="sd"> y 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">droplevel</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="n">level</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.tail"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.tail.html#pyspark.pandas.Series.tail">[docs]</a> <span class="k">def</span> <span class="nf">tail</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">n</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">5</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the last `n` rows.</span>
<span class="sd"> This function returns last `n` rows from the object based on</span>
<span class="sd"> position. It is useful for quickly verifying data, for example,</span>
<span class="sd"> after sorting or appending rows.</span>
<span class="sd"> For negative values of `n`, this function returns all rows except</span>
<span class="sd"> the first `n` rows, equivalent to ``df[n:]``.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> n : int, default 5</span>
<span class="sd"> Number of rows to select.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> type of caller</span>
<span class="sd"> The last `n` rows of the caller object.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> DataFrame.head : The first `n` rows of the caller object.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([1, 2, 3, 4, 5])</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 0 1</span>
<span class="sd"> 1 2</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> 4 5</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; psser.tail(3) # doctest: +SKIP</span>
<span class="sd"> 2 3</span>
<span class="sd"> 3 4</span>
<span class="sd"> 4 5</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">tail</span><span class="p">(</span><span class="n">n</span><span class="o">=</span><span class="n">n</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.explode"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.explode.html#pyspark.pandas.Series.explode">[docs]</a> <span class="k">def</span> <span class="nf">explode</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Transform each element of a list-like to a row.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Exploded lists to rows; index will be duplicated for these rows.</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> Series.str.split : Split string values on specified separator.</span>
<span class="sd"> Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex</span>
<span class="sd"> to produce DataFrame.</span>
<span class="sd"> DataFrame.melt : Unpivot a DataFrame from wide format to long format.</span>
<span class="sd"> DataFrame.explode : Explode a DataFrame from list-like</span>
<span class="sd"> columns to long format.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([[1, 2, 3], [], [3, 4]])</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 0 [1, 2, 3]</span>
<span class="sd"> 1 []</span>
<span class="sd"> 2 [3, 4]</span>
<span class="sd"> dtype: object</span>
<span class="sd"> &gt;&gt;&gt; psser.explode() # doctest: +SKIP</span>
<span class="sd"> 0 1.0</span>
<span class="sd"> 0 2.0</span>
<span class="sd"> 0 3.0</span>
<span class="sd"> 1 NaN</span>
<span class="sd"> 2 3.0</span>
<span class="sd"> 2 4.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">ArrayType</span><span class="p">):</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">explode_outer</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">name_like_string</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_column_label</span><span class="p">))</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_columns</span><span class="p">([</span><span class="n">scol</span><span class="p">],</span> <span class="n">keep_order</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span></div>
<div class="viewcode-block" id="Series.argsort"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.argsort.html#pyspark.pandas.Series.argsort">[docs]</a> <span class="k">def</span> <span class="nf">argsort</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return the integer indices that would sort the Series values.</span>
<span class="sd"> Unlike pandas, the index order is not preserved in the result.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Positions of values within the sort order with -1 indicating</span>
<span class="sd"> nan values.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([3, 3, 4, 1, 6, 2, 3, 7, 8, 7, 10])</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 0 3</span>
<span class="sd"> 1 3</span>
<span class="sd"> 2 4</span>
<span class="sd"> 3 1</span>
<span class="sd"> 4 6</span>
<span class="sd"> 5 2</span>
<span class="sd"> 6 3</span>
<span class="sd"> 7 7</span>
<span class="sd"> 8 8</span>
<span class="sd"> 9 7</span>
<span class="sd"> 10 10</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; psser.argsort().sort_index()</span>
<span class="sd"> 0 3</span>
<span class="sd"> 1 5</span>
<span class="sd"> 2 0</span>
<span class="sd"> 3 1</span>
<span class="sd"> 4 6</span>
<span class="sd"> 5 2</span>
<span class="sd"> 6 4</span>
<span class="sd"> 7 7</span>
<span class="sd"> 8 9</span>
<span class="sd"> 9 8</span>
<span class="sd"> 10 10</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">notnull</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">notnull</span><span class="p">()]</span>
<span class="n">sdf_for_index</span> <span class="o">=</span> <span class="n">notnull</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">notnull</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span><span class="p">)</span>
<span class="n">tmp_join_key</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">sdf_for_index</span><span class="p">,</span> <span class="s2">&quot;__tmp_join_key__&quot;</span><span class="p">)</span>
<span class="n">sdf_for_index</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="o">.</span><span class="n">attach_distributed_sequence_column</span><span class="p">(</span>
<span class="n">sdf_for_index</span><span class="p">,</span> <span class="n">tmp_join_key</span>
<span class="p">)</span>
<span class="c1"># sdf_for_index:</span>
<span class="c1"># +----------------+-----------------+</span>
<span class="c1"># |__tmp_join_key__|__index_level_0__|</span>
<span class="c1"># +----------------+-----------------+</span>
<span class="c1"># | 0| 0|</span>
<span class="c1"># | 1| 1|</span>
<span class="c1"># | 2| 2|</span>
<span class="c1"># | 3| 3|</span>
<span class="c1"># | 4| 4|</span>
<span class="c1"># +----------------+-----------------+</span>
<span class="n">sdf_for_data</span> <span class="o">=</span> <span class="n">notnull</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="n">notnull</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="s2">&quot;values&quot;</span><span class="p">),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span>
<span class="p">)</span>
<span class="n">sdf_for_data</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="o">.</span><span class="n">attach_distributed_sequence_column</span><span class="p">(</span>
<span class="n">sdf_for_data</span><span class="p">,</span> <span class="n">SPARK_DEFAULT_SERIES_NAME</span>
<span class="p">)</span>
<span class="c1"># sdf_for_data:</span>
<span class="c1"># +---+------+-----------------+</span>
<span class="c1"># | 0|values|__natural_order__|</span>
<span class="c1"># +---+------+-----------------+</span>
<span class="c1"># | 0| 3| 25769803776|</span>
<span class="c1"># | 1| 3| 51539607552|</span>
<span class="c1"># | 2| 4| 77309411328|</span>
<span class="c1"># | 3| 1| 103079215104|</span>
<span class="c1"># | 4| 2| 128849018880|</span>
<span class="c1"># +---+------+-----------------+</span>
<span class="n">sdf_for_data</span> <span class="o">=</span> <span class="n">sdf_for_data</span><span class="o">.</span><span class="n">sort</span><span class="p">(</span>
<span class="n">scol_for</span><span class="p">(</span><span class="n">sdf_for_data</span><span class="p">,</span> <span class="s2">&quot;values&quot;</span><span class="p">),</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span>
<span class="p">)</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="s2">&quot;values&quot;</span><span class="p">,</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="n">tmp_join_key</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">sdf_for_data</span><span class="p">,</span> <span class="s2">&quot;__tmp_join_key__&quot;</span><span class="p">)</span>
<span class="n">sdf_for_data</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="o">.</span><span class="n">attach_distributed_sequence_column</span><span class="p">(</span><span class="n">sdf_for_data</span><span class="p">,</span> <span class="n">tmp_join_key</span><span class="p">)</span>
<span class="c1"># sdf_for_index: sdf_for_data:</span>
<span class="c1"># +----------------+-----------------+ +----------------+---+</span>
<span class="c1"># |__tmp_join_key__|__index_level_0__| |__tmp_join_key__| 0|</span>
<span class="c1"># +----------------+-----------------+ +----------------+---+</span>
<span class="c1"># | 0| 0| | 0| 3|</span>
<span class="c1"># | 1| 1| | 1| 4|</span>
<span class="c1"># | 2| 2| | 2| 0|</span>
<span class="c1"># | 3| 3| | 3| 1|</span>
<span class="c1"># | 4| 4| | 4| 2|</span>
<span class="c1"># +----------------+-----------------+ +----------------+---+</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf_for_index</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">sdf_for_data</span><span class="p">,</span> <span class="n">on</span><span class="o">=</span><span class="n">tmp_join_key</span><span class="p">)</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">tmp_join_key</span><span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_sdf</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">data_columns</span><span class="o">=</span><span class="p">[</span><span class="n">SPARK_DEFAULT_SERIES_NAME</span><span class="p">],</span>
<span class="n">index_fields</span><span class="o">=</span><span class="p">[</span>
<span class="n">InternalField</span><span class="p">(</span><span class="n">dtype</span><span class="o">=</span><span class="n">field</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span> <span class="k">for</span> <span class="n">field</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_fields</span>
<span class="p">],</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">],</span>
<span class="p">)</span>
<span class="n">psser</span> <span class="o">=</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span>
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span>
<span class="n">Series</span><span class="p">,</span>
<span class="n">ps</span><span class="o">.</span><span class="n">concat</span><span class="p">([</span><span class="n">psser</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">isnull</span><span class="p">()]</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="k">lambda</span> <span class="n">_</span><span class="p">:</span> <span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">))]),</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="Series.argmax"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.argmax.html#pyspark.pandas.Series.argmax">[docs]</a> <span class="k">def</span> <span class="nf">argmax</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return int position of the largest value in the Series.</span>
<span class="sd"> If the maximum is achieved in multiple locations,</span>
<span class="sd"> the first row position is returned.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> int</span>
<span class="sd"> Row position of the maximum value.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Consider dataset containing cereal calories</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series({&#39;Corn Flakes&#39;: 100.0, &#39;Almond Delight&#39;: 110.0,</span>
<span class="sd"> ... &#39;Cinnamon Toast Crunch&#39;: 120.0, &#39;Cocoa Puff&#39;: 110.0})</span>
<span class="sd"> &gt;&gt;&gt; s # doctest: +SKIP</span>
<span class="sd"> Corn Flakes 100.0</span>
<span class="sd"> Almond Delight 110.0</span>
<span class="sd"> Cinnamon Toast Crunch 120.0</span>
<span class="sd"> Cocoa Puff 110.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.argmax() # doctest: +SKIP</span>
<span class="sd"> 2</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="n">max_value</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])),</span>
<span class="n">F</span><span class="o">.</span><span class="n">first</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">head</span><span class="p">()</span>
<span class="k">if</span> <span class="n">max_value</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;attempt to get argmax of an empty sequence&quot;</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">max_value</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="o">-</span><span class="mi">1</span>
<span class="c1"># We should remember the natural sequence started from 0</span>
<span class="n">seq_col_name</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="s2">&quot;__distributed_sequence_column__&quot;</span><span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="o">.</span><span class="n">attach_distributed_sequence_column</span><span class="p">(</span>
<span class="n">sdf</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">),</span> <span class="n">seq_col_name</span>
<span class="p">)</span>
<span class="c1"># If the maximum is achieved in multiple locations, the first row position is returned.</span>
<span class="k">return</span> <span class="n">sdf</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
<span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o">==</span> <span class="n">max_value</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="p">)</span><span class="o">.</span><span class="n">head</span><span class="p">()[</span><span class="mi">0</span><span class="p">]</span></div>
<div class="viewcode-block" id="Series.argmin"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.argmin.html#pyspark.pandas.Series.argmin">[docs]</a> <span class="k">def</span> <span class="nf">argmin</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return int position of the smallest value in the Series.</span>
<span class="sd"> If the minimum is achieved in multiple locations,</span>
<span class="sd"> the first row position is returned.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> int</span>
<span class="sd"> Row position of the minimum value.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> Consider dataset containing cereal calories</span>
<span class="sd"> &gt;&gt;&gt; s = ps.Series({&#39;Corn Flakes&#39;: 100.0, &#39;Almond Delight&#39;: 110.0,</span>
<span class="sd"> ... &#39;Cinnamon Toast Crunch&#39;: 120.0, &#39;Cocoa Puff&#39;: 110.0})</span>
<span class="sd"> &gt;&gt;&gt; s # doctest: +SKIP</span>
<span class="sd"> Corn Flakes 100.0</span>
<span class="sd"> Almond Delight 110.0</span>
<span class="sd"> Cinnamon Toast Crunch 120.0</span>
<span class="sd"> Cocoa Puff 110.0</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; s.argmin() # doctest: +SKIP</span>
<span class="sd"> 0</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">,</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="n">min_value</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])),</span>
<span class="n">F</span><span class="o">.</span><span class="n">first</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">head</span><span class="p">()</span>
<span class="k">if</span> <span class="n">min_value</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;attempt to get argmin of an empty sequence&quot;</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">min_value</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="o">-</span><span class="mi">1</span>
<span class="c1"># We should remember the natural sequence started from 0</span>
<span class="n">seq_col_name</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="s2">&quot;__distributed_sequence_column__&quot;</span><span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="o">.</span><span class="n">attach_distributed_sequence_column</span><span class="p">(</span>
<span class="n">sdf</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">),</span> <span class="n">seq_col_name</span>
<span class="p">)</span>
<span class="c1"># If the minimum is achieved in multiple locations, the first row position is returned.</span>
<span class="k">return</span> <span class="n">sdf</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span>
<span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_column_names</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o">==</span> <span class="n">min_value</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="p">)</span><span class="o">.</span><span class="n">head</span><span class="p">()[</span><span class="mi">0</span><span class="p">]</span></div>
<div class="viewcode-block" id="Series.compare"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.compare.html#pyspark.pandas.Series.compare">[docs]</a> <span class="k">def</span> <span class="nf">compare</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">:</span> <span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">keep_shape</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">keep_equal</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">DataFrame</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Compare to another Series and show the differences.</span>
<span class="sd"> .. note:: This API is slightly different from pandas when indexes from both Series</span>
<span class="sd"> are not identical and config &#39;compute.eager_check&#39; is False. pandas raises an exception;</span>
<span class="sd"> however, pandas-on-Spark just proceeds and performs by ignoring mismatches.</span>
<span class="sd"> &gt;&gt;&gt; psser1 = ps.Series([1, 2, 3, 4, 5], index=pd.Index([1, 2, 3, 4, 5]))</span>
<span class="sd"> &gt;&gt;&gt; psser2 = ps.Series([1, 2, 3, 4, 5], index=pd.Index([1, 2, 4, 3, 6]))</span>
<span class="sd"> &gt;&gt;&gt; psser1.compare(psser2) # doctest: +SKIP</span>
<span class="sd"> ...</span>
<span class="sd"> ValueError: Can only compare identically-labeled Series objects</span>
<span class="sd"> &gt;&gt;&gt; with ps.option_context(&quot;compute.eager_check&quot;, False):</span>
<span class="sd"> ... psser1.compare(psser2) # doctest: +SKIP</span>
<span class="sd"> ...</span>
<span class="sd"> self other</span>
<span class="sd"> 3 3.0 4.0</span>
<span class="sd"> 4 4.0 3.0</span>
<span class="sd"> 5 5.0 NaN</span>
<span class="sd"> 6 NaN 5.0</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : Series</span>
<span class="sd"> Object to compare with.</span>
<span class="sd"> keep_shape : bool, default False</span>
<span class="sd"> If true, all rows and columns are kept.</span>
<span class="sd"> Otherwise, only the ones with different values are kept.</span>
<span class="sd"> keep_equal : bool, default False</span>
<span class="sd"> If true, the result keeps values that are equal.</span>
<span class="sd"> Otherwise, equal values are shown as NaNs.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> DataFrame</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> Matching NaNs will not appear as a difference.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; from pyspark.pandas.config import set_option, reset_option</span>
<span class="sd"> &gt;&gt;&gt; set_option(&quot;compute.ops_on_diff_frames&quot;, True)</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;, &quot;d&quot;, &quot;e&quot;])</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([&quot;a&quot;, &quot;a&quot;, &quot;c&quot;, &quot;b&quot;, &quot;e&quot;])</span>
<span class="sd"> Align the differences on columns</span>
<span class="sd"> &gt;&gt;&gt; s1.compare(s2).sort_index()</span>
<span class="sd"> self other</span>
<span class="sd"> 1 b a</span>
<span class="sd"> 3 d b</span>
<span class="sd"> Keep all original rows</span>
<span class="sd"> &gt;&gt;&gt; s1.compare(s2, keep_shape=True).sort_index()</span>
<span class="sd"> self other</span>
<span class="sd"> 0 None None</span>
<span class="sd"> 1 b a</span>
<span class="sd"> 2 None None</span>
<span class="sd"> 3 d b</span>
<span class="sd"> 4 None None</span>
<span class="sd"> Keep all original rows and also all original values</span>
<span class="sd"> &gt;&gt;&gt; s1.compare(s2, keep_shape=True, keep_equal=True).sort_index()</span>
<span class="sd"> self other</span>
<span class="sd"> 0 a a</span>
<span class="sd"> 1 b a</span>
<span class="sd"> 2 c c</span>
<span class="sd"> 3 d b</span>
<span class="sd"> 4 e e</span>
<span class="sd"> &gt;&gt;&gt; reset_option(&quot;compute.ops_on_diff_frames&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">combined</span><span class="p">:</span> <span class="n">DataFrame</span>
<span class="k">if</span> <span class="n">same_anchor</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">other</span><span class="p">):</span>
<span class="n">self_column_label</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">to_frame</span><span class="p">(),</span> <span class="s2">&quot;__self_column__&quot;</span><span class="p">)</span>
<span class="n">other_column_label</span> <span class="o">=</span> <span class="n">verify_temp_column_name</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">(),</span> <span class="s2">&quot;__other_column__&quot;</span><span class="p">)</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">DataFrame</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">with_new_columns</span><span class="p">(</span>
<span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">self_column_label</span><span class="p">),</span> <span class="n">other</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">other_column_label</span><span class="p">)]</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">get_option</span><span class="p">(</span><span class="s2">&quot;compute.eager_check&quot;</span><span class="p">)</span> <span class="ow">and</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">equals</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">index</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Can only compare identically-labeled Series objects&quot;</span><span class="p">)</span>
<span class="n">combined</span> <span class="o">=</span> <span class="n">combine_frames</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">(),</span> <span class="n">other</span><span class="o">.</span><span class="n">to_frame</span><span class="p">())</span>
<span class="n">this_column_label</span> <span class="o">=</span> <span class="s2">&quot;self&quot;</span>
<span class="n">that_column_label</span> <span class="o">=</span> <span class="s2">&quot;other&quot;</span>
<span class="k">if</span> <span class="n">keep_equal</span> <span class="ow">and</span> <span class="n">keep_shape</span><span class="p">:</span>
<span class="n">combined</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Index</span><span class="p">([</span><span class="n">this_column_label</span><span class="p">,</span> <span class="n">that_column_label</span><span class="p">])</span>
<span class="k">return</span> <span class="n">combined</span>
<span class="n">this_data_scol</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">that_data_scol</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_spark_columns</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="n">index_scols</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_columns</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span>
<span class="k">if</span> <span class="n">keep_shape</span><span class="p">:</span>
<span class="n">this_scol</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">this_data_scol</span> <span class="o">==</span> <span class="n">that_data_scol</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">this_data_scol</span><span class="p">)</span>
<span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">this_column_label</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">this_field</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="n">this_column_label</span><span class="p">,</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">True</span>
<span class="p">)</span>
<span class="n">that_scol</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">this_data_scol</span> <span class="o">==</span> <span class="n">that_data_scol</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">that_data_scol</span><span class="p">)</span>
<span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">that_column_label</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">that_field</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="n">that_column_label</span><span class="p">,</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">True</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="o">~</span><span class="n">this_data_scol</span><span class="o">.</span><span class="n">eqNullSafe</span><span class="p">(</span><span class="n">that_data_scol</span><span class="p">))</span>
<span class="n">this_scol</span> <span class="o">=</span> <span class="n">this_data_scol</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">this_column_label</span><span class="p">)</span>
<span class="n">this_field</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">this_column_label</span><span class="p">)</span>
<span class="n">that_scol</span> <span class="o">=</span> <span class="n">that_data_scol</span><span class="o">.</span><span class="n">alias</span><span class="p">(</span><span class="n">that_column_label</span><span class="p">)</span>
<span class="n">that_field</span> <span class="o">=</span> <span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">data_fields</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="n">that_column_label</span><span class="p">)</span>
<span class="n">sdf</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="o">*</span><span class="n">index_scols</span><span class="p">,</span> <span class="n">this_scol</span><span class="p">,</span> <span class="n">that_scol</span><span class="p">,</span> <span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">)</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">InternalFrame</span><span class="p">(</span>
<span class="n">spark_frame</span><span class="o">=</span><span class="n">sdf</span><span class="p">,</span>
<span class="n">index_spark_columns</span><span class="o">=</span><span class="p">[</span>
<span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span> <span class="k">for</span> <span class="n">col</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_spark_column_names</span>
<span class="p">],</span>
<span class="n">index_names</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_names</span><span class="p">,</span>
<span class="n">index_fields</span><span class="o">=</span><span class="n">combined</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_fields</span><span class="p">,</span>
<span class="n">column_labels</span><span class="o">=</span><span class="p">[(</span><span class="n">this_column_label</span><span class="p">,),</span> <span class="p">(</span><span class="n">that_column_label</span><span class="p">,)],</span>
<span class="n">data_spark_columns</span><span class="o">=</span><span class="p">[</span><span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">this_column_label</span><span class="p">),</span> <span class="n">scol_for</span><span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">that_column_label</span><span class="p">)],</span>
<span class="n">data_fields</span><span class="o">=</span><span class="p">[</span><span class="n">this_field</span><span class="p">,</span> <span class="n">that_field</span><span class="p">],</span>
<span class="n">column_label_names</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">],</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.align"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.align.html#pyspark.pandas.Series.align">[docs]</a> <span class="k">def</span> <span class="nf">align</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">other</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">DataFrame</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">],</span>
<span class="n">join</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;outer&quot;</span><span class="p">,</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">copy</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Tuple</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">Union</span><span class="p">[</span><span class="n">DataFrame</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Align two objects on their axes with the specified join method.</span>
<span class="sd"> Join method is specified for each axis Index.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> other : DataFrame or Series</span>
<span class="sd"> join : {{&#39;outer&#39;, &#39;inner&#39;, &#39;left&#39;, &#39;right&#39;}}, default &#39;outer&#39;</span>
<span class="sd"> axis : allowed axis of the other object, default None</span>
<span class="sd"> Align on index (0), columns (1), or both (None).</span>
<span class="sd"> copy : bool, default True</span>
<span class="sd"> Always returns new objects. If copy=False and no reindexing is</span>
<span class="sd"> required then original objects are returned.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> (left, right) : (Series, type of other)</span>
<span class="sd"> Aligned objects.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; ps.set_option(&quot;compute.ops_on_diff_frames&quot;, True)</span>
<span class="sd"> &gt;&gt;&gt; s1 = ps.Series([7, 8, 9], index=[10, 11, 12])</span>
<span class="sd"> &gt;&gt;&gt; s2 = ps.Series([&quot;g&quot;, &quot;h&quot;, &quot;i&quot;], index=[10, 20, 30])</span>
<span class="sd"> &gt;&gt;&gt; aligned_l, aligned_r = s1.align(s2)</span>
<span class="sd"> &gt;&gt;&gt; aligned_l.sort_index()</span>
<span class="sd"> 10 7.0</span>
<span class="sd"> 11 8.0</span>
<span class="sd"> 12 9.0</span>
<span class="sd"> 20 NaN</span>
<span class="sd"> 30 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; aligned_r.sort_index()</span>
<span class="sd"> 10 g</span>
<span class="sd"> 11 None</span>
<span class="sd"> 12 None</span>
<span class="sd"> 20 h</span>
<span class="sd"> 30 i</span>
<span class="sd"> dtype: object</span>
<span class="sd"> Align with the join type &quot;inner&quot;:</span>
<span class="sd"> &gt;&gt;&gt; aligned_l, aligned_r = s1.align(s2, join=&quot;inner&quot;)</span>
<span class="sd"> &gt;&gt;&gt; aligned_l.sort_index()</span>
<span class="sd"> 10 7</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; aligned_r.sort_index()</span>
<span class="sd"> 10 g</span>
<span class="sd"> dtype: object</span>
<span class="sd"> Align with a DataFrame:</span>
<span class="sd"> &gt;&gt;&gt; df = ps.DataFrame({&quot;a&quot;: [1, 2, 3], &quot;b&quot;: [&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]}, index=[10, 20, 30])</span>
<span class="sd"> &gt;&gt;&gt; aligned_l, aligned_r = s1.align(df)</span>
<span class="sd"> &gt;&gt;&gt; aligned_l.sort_index()</span>
<span class="sd"> 10 7.0</span>
<span class="sd"> 11 8.0</span>
<span class="sd"> 12 9.0</span>
<span class="sd"> 20 NaN</span>
<span class="sd"> 30 NaN</span>
<span class="sd"> dtype: float64</span>
<span class="sd"> &gt;&gt;&gt; aligned_r.sort_index()</span>
<span class="sd"> a b</span>
<span class="sd"> 10 1.0 a</span>
<span class="sd"> 11 NaN None</span>
<span class="sd"> 12 NaN None</span>
<span class="sd"> 20 2.0 b</span>
<span class="sd"> 30 3.0 c</span>
<span class="sd"> &gt;&gt;&gt; ps.reset_option(&quot;compute.ops_on_diff_frames&quot;)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span>
<span class="k">if</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Series does not support columns axis.&quot;</span><span class="p">)</span>
<span class="n">self_df</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span>
<span class="n">left</span><span class="p">,</span> <span class="n">right</span> <span class="o">=</span> <span class="n">self_df</span><span class="o">.</span><span class="n">align</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">join</span><span class="o">=</span><span class="n">join</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="k">if</span> <span class="n">left</span> <span class="ow">is</span> <span class="n">self_df</span><span class="p">:</span>
<span class="n">left_ser</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">left_ser</span> <span class="o">=</span> <span class="n">first_series</span><span class="p">(</span><span class="n">left</span><span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">return</span> <span class="p">(</span><span class="n">left_ser</span><span class="o">.</span><span class="n">copy</span><span class="p">(),</span> <span class="n">right</span><span class="o">.</span><span class="n">copy</span><span class="p">())</span> <span class="k">if</span> <span class="n">copy</span> <span class="k">else</span> <span class="p">(</span><span class="n">left_ser</span><span class="p">,</span> <span class="n">right</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.between_time"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.between_time.html#pyspark.pandas.Series.between_time">[docs]</a> <span class="k">def</span> <span class="nf">between_time</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">start_time</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">datetime</span><span class="o">.</span><span class="n">time</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span>
<span class="n">end_time</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">datetime</span><span class="o">.</span><span class="n">time</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span>
<span class="n">include_start</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">include_end</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Select values between particular times of the day (example: 9:00-9:30 AM).</span>
<span class="sd"> By setting ``start_time`` to be later than ``end_time``,</span>
<span class="sd"> you can get the times that are *not* between the two times.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> start_time : datetime.time or str</span>
<span class="sd"> Initial time as a time filter limit.</span>
<span class="sd"> end_time : datetime.time or str</span>
<span class="sd"> End time as a time filter limit.</span>
<span class="sd"> include_start : bool, default True</span>
<span class="sd"> Whether the start time needs to be included in the result.</span>
<span class="sd"> include_end : bool, default True</span>
<span class="sd"> Whether the end time needs to be included in the result.</span>
<span class="sd"> axis : {0 or &#39;index&#39;, 1 or &#39;columns&#39;}, default 0</span>
<span class="sd"> Determine range time on index or columns value.</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Data from the original object filtered to the specified dates range.</span>
<span class="sd"> Raises</span>
<span class="sd"> ------</span>
<span class="sd"> TypeError</span>
<span class="sd"> If the index is not a :class:`DatetimeIndex`</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> at_time : Select values at a particular time of the day.</span>
<span class="sd"> last : Select final periods of time series based on a date offset.</span>
<span class="sd"> DatetimeIndex.indexer_between_time : Get just the index locations for</span>
<span class="sd"> values between particular times of the day.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; idx = pd.date_range(&#39;2018-04-09&#39;, periods=4, freq=&#39;1D20min&#39;)</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([1, 2, 3, 4], index=idx)</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 2018-04-09 00:00:00 1</span>
<span class="sd"> 2018-04-10 00:20:00 2</span>
<span class="sd"> 2018-04-11 00:40:00 3</span>
<span class="sd"> 2018-04-12 01:00:00 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; psser.between_time(&#39;0:15&#39;, &#39;0:45&#39;)</span>
<span class="sd"> 2018-04-10 00:20:00 2</span>
<span class="sd"> 2018-04-11 00:40:00 3</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">between_time</span><span class="p">(</span><span class="n">start_time</span><span class="p">,</span> <span class="n">end_time</span><span class="p">,</span> <span class="n">include_start</span><span class="p">,</span> <span class="n">include_end</span><span class="p">,</span> <span class="n">axis</span><span class="p">)</span>
<span class="p">)</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<div class="viewcode-block" id="Series.at_time"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.at_time.html#pyspark.pandas.Series.at_time">[docs]</a> <span class="k">def</span> <span class="nf">at_time</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">time</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">datetime</span><span class="o">.</span><span class="n">time</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span> <span class="n">asof</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span> <span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="mi">0</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Select values at particular time of day (example: 9:30AM).</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> time : datetime.time or str</span>
<span class="sd"> axis : {0 or &#39;index&#39;, 1 or &#39;columns&#39;}, default 0</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> Series</span>
<span class="sd"> Raises</span>
<span class="sd"> ------</span>
<span class="sd"> TypeError</span>
<span class="sd"> If the index is not a :class:`DatetimeIndex`</span>
<span class="sd"> See Also</span>
<span class="sd"> --------</span>
<span class="sd"> between_time : Select values between particular times of the day.</span>
<span class="sd"> DatetimeIndex.indexer_at_time : Get just the index locations for</span>
<span class="sd"> values at particular time of the day.</span>
<span class="sd"> Examples</span>
<span class="sd"> --------</span>
<span class="sd"> &gt;&gt;&gt; idx = pd.date_range(&#39;2018-04-09&#39;, periods=4, freq=&#39;12H&#39;)</span>
<span class="sd"> &gt;&gt;&gt; psser = ps.Series([1, 2, 3, 4], index=idx)</span>
<span class="sd"> &gt;&gt;&gt; psser</span>
<span class="sd"> 2018-04-09 00:00:00 1</span>
<span class="sd"> 2018-04-09 12:00:00 2</span>
<span class="sd"> 2018-04-10 00:00:00 3</span>
<span class="sd"> 2018-04-10 12:00:00 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &gt;&gt;&gt; psser.at_time(&#39;12:00&#39;)</span>
<span class="sd"> 2018-04-09 12:00:00 2</span>
<span class="sd"> 2018-04-10 12:00:00 4</span>
<span class="sd"> dtype: int64</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span><span class="o">.</span><span class="n">at_time</span><span class="p">(</span><span class="n">time</span><span class="p">,</span> <span class="n">asof</span><span class="p">,</span> <span class="n">axis</span><span class="p">))</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">)</span></div>
<span class="k">def</span> <span class="nf">_cum</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">func</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="n">Column</span><span class="p">],</span> <span class="n">Column</span><span class="p">],</span>
<span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span>
<span class="n">part_cols</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="s2">&quot;ColumnOrName&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">(),</span>
<span class="n">ascending</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="c1"># This is used to cummin, cummax, cumsum, etc.</span>
<span class="k">if</span> <span class="n">ascending</span><span class="p">:</span>
<span class="n">window</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">asc</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">))</span>
<span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">part_cols</span><span class="p">)</span>
<span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">window</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">Window</span><span class="o">.</span><span class="n">orderBy</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">desc</span><span class="p">(</span><span class="n">NATURAL_ORDER_COLUMN_NAME</span><span class="p">))</span>
<span class="o">.</span><span class="n">partitionBy</span><span class="p">(</span><span class="o">*</span><span class="n">part_cols</span><span class="p">)</span>
<span class="o">.</span><span class="n">rowsBetween</span><span class="p">(</span><span class="n">Window</span><span class="o">.</span><span class="n">unboundedPreceding</span><span class="p">,</span> <span class="n">Window</span><span class="o">.</span><span class="n">currentRow</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">skipna</span><span class="p">:</span>
<span class="c1"># There is a behavior difference between pandas and PySpark. In case of cummax,</span>
<span class="c1">#</span>
<span class="c1"># Input:</span>
<span class="c1"># A B</span>
<span class="c1"># 0 2.0 1.0</span>
<span class="c1"># 1 5.0 NaN</span>
<span class="c1"># 2 1.0 0.0</span>
<span class="c1"># 3 2.0 4.0</span>
<span class="c1"># 4 4.0 9.0</span>
<span class="c1">#</span>
<span class="c1"># pandas:</span>
<span class="c1"># A B</span>
<span class="c1"># 0 2.0 1.0</span>
<span class="c1"># 1 5.0 NaN</span>
<span class="c1"># 2 5.0 1.0</span>
<span class="c1"># 3 5.0 4.0</span>
<span class="c1"># 4 5.0 9.0</span>
<span class="c1">#</span>
<span class="c1"># PySpark:</span>
<span class="c1"># A B</span>
<span class="c1"># 0 2.0 1.0</span>
<span class="c1"># 1 5.0 1.0</span>
<span class="c1"># 2 5.0 1.0</span>
<span class="c1"># 3 5.0 4.0</span>
<span class="c1"># 4 5.0 9.0</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="c1"># Manually sets nulls given the column defined above.</span>
<span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">(),</span>
<span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="c1"># Here, we use two Windows.</span>
<span class="c1"># One for real data.</span>
<span class="c1"># The other one for setting nulls after the first null it meets.</span>
<span class="c1">#</span>
<span class="c1"># There is a behavior difference between pandas and PySpark. In case of cummax,</span>
<span class="c1">#</span>
<span class="c1"># Input:</span>
<span class="c1"># A B</span>
<span class="c1"># 0 2.0 1.0</span>
<span class="c1"># 1 5.0 NaN</span>
<span class="c1"># 2 1.0 0.0</span>
<span class="c1"># 3 2.0 4.0</span>
<span class="c1"># 4 4.0 9.0</span>
<span class="c1">#</span>
<span class="c1"># pandas:</span>
<span class="c1"># A B</span>
<span class="c1"># 0 2.0 1.0</span>
<span class="c1"># 1 5.0 NaN</span>
<span class="c1"># 2 5.0 NaN</span>
<span class="c1"># 3 5.0 NaN</span>
<span class="c1"># 4 5.0 NaN</span>
<span class="c1">#</span>
<span class="c1"># PySpark:</span>
<span class="c1"># A B</span>
<span class="c1"># 0 2.0 1.0</span>
<span class="c1"># 1 5.0 1.0</span>
<span class="c1"># 2 5.0 1.0</span>
<span class="c1"># 3 5.0 4.0</span>
<span class="c1"># 4 5.0 9.0</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span>
<span class="c1"># By going through with max, it sets True after the first time it meets null.</span>
<span class="n">F</span><span class="o">.</span><span class="n">max</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">isNull</span><span class="p">())</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">),</span>
<span class="c1"># Manually sets nulls given the column defined above.</span>
<span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">None</span><span class="p">),</span>
<span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">func</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="p">)</span><span class="o">.</span><span class="n">over</span><span class="p">(</span><span class="n">window</span><span class="p">))</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_cumsum</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span> <span class="n">part_cols</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="s2">&quot;ColumnOrName&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">())</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="n">psser</span> <span class="o">=</span> <span class="bp">self</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">):</span>
<span class="n">psser</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="k">lambda</span> <span class="n">scol</span><span class="p">:</span> <span class="n">scol</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">LongType</span><span class="p">()))</span>
<span class="k">elif</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;Could not convert </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">) to numeric&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">),</span>
<span class="n">psser</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">(),</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">psser</span><span class="o">.</span><span class="n">_cum</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">sum</span><span class="p">,</span> <span class="n">skipna</span><span class="p">,</span> <span class="n">part_cols</span><span class="p">)</span>
<span class="k">def</span> <span class="nf">_cumprod</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">skipna</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span> <span class="n">part_cols</span><span class="p">:</span> <span class="n">Sequence</span><span class="p">[</span><span class="s2">&quot;ColumnOrName&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">())</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">BooleanType</span><span class="p">):</span>
<span class="n">scol</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cum</span><span class="p">(</span>
<span class="k">lambda</span> <span class="n">scol</span><span class="p">:</span> <span class="n">F</span><span class="o">.</span><span class="n">min</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">coalesce</span><span class="p">(</span><span class="n">scol</span><span class="p">,</span> <span class="n">SF</span><span class="o">.</span><span class="n">lit</span><span class="p">(</span><span class="kc">True</span><span class="p">))),</span> <span class="n">skipna</span><span class="p">,</span> <span class="n">part_cols</span>
<span class="p">)</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">LongType</span><span class="p">())</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">NumericType</span><span class="p">):</span>
<span class="n">num_zeros</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cum</span><span class="p">(</span>
<span class="k">lambda</span> <span class="n">scol</span><span class="p">:</span> <span class="n">F</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">scol</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="mi">0</span><span class="p">)),</span> <span class="n">skipna</span><span class="p">,</span> <span class="n">part_cols</span>
<span class="p">)</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">num_negatives</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cum</span><span class="p">(</span>
<span class="k">lambda</span> <span class="n">scol</span><span class="p">:</span> <span class="n">F</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">scol</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="mi">0</span><span class="p">)),</span> <span class="n">skipna</span><span class="p">,</span> <span class="n">part_cols</span>
<span class="p">)</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="n">sign</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">num_negatives</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="n">abs_prod</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">exp</span><span class="p">(</span>
<span class="bp">self</span><span class="o">.</span><span class="n">_cum</span><span class="p">(</span><span class="k">lambda</span> <span class="n">scol</span><span class="p">:</span> <span class="n">F</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">F</span><span class="o">.</span><span class="n">abs</span><span class="p">(</span><span class="n">scol</span><span class="p">))),</span> <span class="n">skipna</span><span class="p">,</span> <span class="n">part_cols</span><span class="p">)</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">column</span>
<span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">num_zeros</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">otherwise</span><span class="p">(</span><span class="n">sign</span> <span class="o">*</span> <span class="n">abs_prod</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">IntegralType</span><span class="p">):</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">LongType</span><span class="p">())</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
<span class="s2">&quot;Could not convert </span><span class="si">{}</span><span class="s2"> (</span><span class="si">{}</span><span class="s2">) to numeric&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">spark_type_to_pandas_dtype</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">),</span>
<span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="o">.</span><span class="n">simpleString</span><span class="p">(),</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">scol</span><span class="p">)</span>
<span class="c1"># ----------------------------------------------------------------------</span>
<span class="c1"># Accessor Methods</span>
<span class="c1"># ----------------------------------------------------------------------</span>
<span class="n">dt</span> <span class="o">=</span> <span class="n">CachedAccessor</span><span class="p">(</span><span class="s2">&quot;dt&quot;</span><span class="p">,</span> <span class="n">DatetimeMethods</span><span class="p">)</span>
<span class="nb">str</span> <span class="o">=</span> <span class="n">CachedAccessor</span><span class="p">(</span><span class="s2">&quot;str&quot;</span><span class="p">,</span> <span class="n">StringMethods</span><span class="p">)</span>
<span class="n">cat</span> <span class="o">=</span> <span class="n">CachedAccessor</span><span class="p">(</span><span class="s2">&quot;cat&quot;</span><span class="p">,</span> <span class="n">CategoricalAccessor</span><span class="p">)</span>
<span class="n">plot</span> <span class="o">=</span> <span class="n">CachedAccessor</span><span class="p">(</span><span class="s2">&quot;plot&quot;</span><span class="p">,</span> <span class="n">PandasOnSparkPlotAccessor</span><span class="p">)</span>
<span class="c1"># ----------------------------------------------------------------------</span>
<span class="k">def</span> <span class="nf">_apply_series_op</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">op</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="s2">&quot;Series&quot;</span><span class="p">],</span> <span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">Column</span><span class="p">]],</span> <span class="n">should_resolve</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;Series&quot;</span><span class="p">:</span>
<span class="n">psser_or_scol</span> <span class="o">=</span> <span class="n">op</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">psser_or_scol</span><span class="p">,</span> <span class="n">Series</span><span class="p">):</span>
<span class="n">psser</span> <span class="o">=</span> <span class="n">psser_or_scol</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">psser</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_with_new_scol</span><span class="p">(</span><span class="n">psser_or_scol</span><span class="p">)</span>
<span class="k">if</span> <span class="n">should_resolve</span><span class="p">:</span>
<span class="n">internal</span> <span class="o">=</span> <span class="n">psser</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">resolved_copy</span>
<span class="k">return</span> <span class="n">first_series</span><span class="p">(</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">internal</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">psser</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">_reduce_for_stat_function</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">sfun</span><span class="p">:</span> <span class="n">Callable</span><span class="p">[[</span><span class="s2">&quot;Series&quot;</span><span class="p">],</span> <span class="n">Column</span><span class="p">],</span>
<span class="n">name</span><span class="p">:</span> <span class="n">str_type</span><span class="p">,</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">Axis</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">numeric_only</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="o">**</span><span class="n">kwargs</span><span class="p">:</span> <span class="n">Any</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Scalar</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Applies sfun to the column and returns a scalar</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> sfun : the stats function to be used for aggregation</span>
<span class="sd"> name : original pandas API name.</span>
<span class="sd"> axis : used only for sanity check because series only support index axis.</span>
<span class="sd"> numeric_only : not used by this implementation, but passed down by stats functions</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">axis</span> <span class="o">=</span> <span class="n">validate_axis</span><span class="p">(</span><span class="n">axis</span><span class="p">)</span>
<span class="k">if</span> <span class="n">axis</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="s2">&quot;Series does not support columns axis.&quot;</span><span class="p">)</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">sfun</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="n">min_count</span> <span class="o">=</span> <span class="n">kwargs</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;min_count&quot;</span><span class="p">,</span> <span class="mi">0</span><span class="p">)</span>
<span class="k">if</span> <span class="n">min_count</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">scol</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">when</span><span class="p">(</span><span class="n">Frame</span><span class="o">.</span><span class="n">_count_expr</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">&gt;=</span> <span class="n">min_count</span><span class="p">,</span> <span class="n">scol</span><span class="p">)</span>
<span class="n">result</span> <span class="o">=</span> <span class="n">unpack_scalar</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">spark_frame</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">scol</span><span class="p">))</span>
<span class="k">return</span> <span class="n">result</span> <span class="k">if</span> <span class="n">result</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="c1"># Override the `groupby` to specify the actual return type annotation.</span>
<div class="viewcode-block" id="Series.groupby"><a class="viewcode-back" href="../../../reference/pyspark.pandas/api/pyspark.pandas.Series.groupby.html#pyspark.pandas.Series.groupby">[docs]</a> <span class="k">def</span> <span class="nf">groupby</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span>
<span class="n">by</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="n">Name</span><span class="p">,</span> <span class="s2">&quot;Series&quot;</span><span class="p">]]],</span>
<span class="n">axis</span><span class="p">:</span> <span class="n">Axis</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="n">as_index</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="n">dropna</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;SeriesGroupBy&quot;</span><span class="p">:</span>
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span>
<span class="s2">&quot;SeriesGroupBy&quot;</span><span class="p">,</span> <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="n">by</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="n">axis</span><span class="p">,</span> <span class="n">as_index</span><span class="o">=</span><span class="n">as_index</span><span class="p">,</span> <span class="n">dropna</span><span class="o">=</span><span class="n">dropna</span><span class="p">)</span>
<span class="p">)</span></div>
<span class="n">groupby</span><span class="o">.</span><span class="vm">__doc__</span> <span class="o">=</span> <span class="n">Frame</span><span class="o">.</span><span class="n">groupby</span><span class="o">.</span><span class="vm">__doc__</span>
<span class="k">def</span> <span class="nf">_build_groupby</span><span class="p">(</span>
<span class="bp">self</span><span class="p">,</span> <span class="n">by</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">Union</span><span class="p">[</span><span class="s2">&quot;Series&quot;</span><span class="p">,</span> <span class="n">Label</span><span class="p">]],</span> <span class="n">as_index</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span> <span class="n">dropna</span><span class="p">:</span> <span class="nb">bool</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="s2">&quot;SeriesGroupBy&quot;</span><span class="p">:</span>
<span class="kn">from</span> <span class="nn">pyspark.pandas.groupby</span> <span class="kn">import</span> <span class="n">SeriesGroupBy</span>
<span class="k">return</span> <span class="n">SeriesGroupBy</span><span class="o">.</span><span class="n">_build</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">by</span><span class="p">,</span> <span class="n">as_index</span><span class="o">=</span><span class="n">as_index</span><span class="p">,</span> <span class="n">dropna</span><span class="o">=</span><span class="n">dropna</span><span class="p">)</span>
<span class="k">def</span> <span class="fm">__getitem__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">if</span> <span class="p">(</span><span class="nb">isinstance</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="nb">slice</span><span class="p">)</span> <span class="ow">and</span> <span class="nb">any</span><span class="p">(</span><span class="nb">type</span><span class="p">(</span><span class="n">n</span><span class="p">)</span> <span class="o">==</span> <span class="nb">int</span> <span class="k">for</span> <span class="n">n</span> <span class="ow">in</span> <span class="p">[</span><span class="n">key</span><span class="o">.</span><span class="n">start</span><span class="p">,</span> <span class="n">key</span><span class="o">.</span><span class="n">stop</span><span class="p">]))</span> <span class="ow">or</span> <span class="p">(</span>
<span class="nb">type</span><span class="p">(</span><span class="n">key</span><span class="p">)</span> <span class="o">==</span> <span class="nb">int</span>
<span class="ow">and</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="p">(</span><span class="n">IntegerType</span><span class="p">,</span> <span class="n">LongType</span><span class="p">))</span>
<span class="p">):</span>
<span class="c1"># Seems like pandas Series always uses int as positional search when slicing</span>
<span class="c1"># with ints, searches based on index values when the value is int.</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
<span class="k">except</span> <span class="n">SparkPandasIndexingError</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span>
<span class="s2">&quot;Key length (</span><span class="si">{}</span><span class="s2">) exceeds index depth (</span><span class="si">{}</span><span class="s2">)&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="nb">len</span><span class="p">(</span><span class="n">key</span><span class="p">),</span> <span class="bp">self</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">index_level</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">def</span> <span class="fm">__getattr__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="n">str_type</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="k">if</span> <span class="n">item</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">&quot;__&quot;</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">MissingPandasLikeSeries</span><span class="p">,</span> <span class="n">item</span><span class="p">):</span>
<span class="n">property_or_func</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="n">MissingPandasLikeSeries</span><span class="p">,</span> <span class="n">item</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="nb">property</span><span class="p">):</span>
<span class="k">return</span> <span class="n">property_or_func</span><span class="o">.</span><span class="n">fget</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">partial</span><span class="p">(</span><span class="n">property_or_func</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span>
<span class="k">raise</span> <span class="ne">AttributeError</span><span class="p">(</span><span class="s2">&quot;&#39;Series&#39; object has no attribute &#39;</span><span class="si">{}</span><span class="s2">&#39;&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">item</span><span class="p">))</span>
<span class="k">def</span> <span class="nf">_to_internal_pandas</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Return a pandas Series directly from _internal to avoid overhead of copy.</span>
<span class="sd"> This method is for internal use only.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">to_pandas_frame</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]</span>
<span class="k">def</span> <span class="fm">__repr__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">str_type</span><span class="p">:</span>
<span class="n">max_display_count</span> <span class="o">=</span> <span class="n">get_option</span><span class="p">(</span><span class="s2">&quot;display.max_rows&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">max_display_count</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">_to_internal_pandas</span><span class="p">()</span><span class="o">.</span><span class="n">to_string</span><span class="p">(</span>
<span class="n">name</span><span class="o">=</span><span class="nb">bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">),</span> <span class="n">dtype</span><span class="o">=</span><span class="nb">bool</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">pser</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">_psdf</span><span class="o">.</span><span class="n">_get_or_create_repr_pandas_cache</span><span class="p">(</span><span class="n">max_display_count</span><span class="p">)[</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">]</span>
<span class="n">pser_length</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">pser</span><span class="p">)</span>
<span class="n">pser</span> <span class="o">=</span> <span class="n">pser</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:</span><span class="n">max_display_count</span><span class="p">]</span>
<span class="k">if</span> <span class="n">pser_length</span> <span class="o">&gt;</span> <span class="n">max_display_count</span><span class="p">:</span>
<span class="n">repr_string</span> <span class="o">=</span> <span class="n">pser</span><span class="o">.</span><span class="n">to_string</span><span class="p">(</span><span class="n">length</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">rest</span><span class="p">,</span> <span class="n">prev_footer</span> <span class="o">=</span> <span class="n">repr_string</span><span class="o">.</span><span class="n">rsplit</span><span class="p">(</span><span class="s2">&quot;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
<span class="n">match</span> <span class="o">=</span> <span class="n">REPR_PATTERN</span><span class="o">.</span><span class="n">search</span><span class="p">(</span><span class="n">prev_footer</span><span class="p">)</span>
<span class="k">if</span> <span class="n">match</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">length</span> <span class="o">=</span> <span class="n">match</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="s2">&quot;length&quot;</span><span class="p">)</span>
<span class="n">dtype_name</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">footer</span> <span class="o">=</span> <span class="s2">&quot;</span><span class="se">\n</span><span class="s2">dtype: </span><span class="si">{dtype}</span><span class="se">\n</span><span class="s2">Showing only the first </span><span class="si">{length}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">length</span><span class="o">=</span><span class="n">length</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">pprint_thing</span><span class="p">(</span><span class="n">dtype_name</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">footer</span> <span class="o">=</span> <span class="p">(</span>
<span class="s2">&quot;</span><span class="se">\n</span><span class="s2">Name: </span><span class="si">{name}</span><span class="s2">, dtype: </span><span class="si">{dtype}</span><span class="s2">&quot;</span>
<span class="s2">&quot;</span><span class="se">\n</span><span class="s2">Showing only the first </span><span class="si">{length}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">length</span><span class="o">=</span><span class="n">length</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="n">pprint_thing</span><span class="p">(</span><span class="n">dtype_name</span><span class="p">)</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">rest</span> <span class="o">+</span> <span class="n">footer</span>
<span class="k">return</span> <span class="n">pser</span><span class="o">.</span><span class="n">to_string</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">dtype</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">dtype</span><span class="p">)</span>
<span class="k">def</span> <span class="fm">__dir__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Iterable</span><span class="p">[</span><span class="n">str_type</span><span class="p">]:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="p">,</span> <span class="n">StructType</span><span class="p">):</span>
<span class="n">fields</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">fields</span> <span class="o">=</span> <span class="p">[</span><span class="n">f</span> <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">spark</span><span class="o">.</span><span class="n">data_type</span><span class="o">.</span><span class="n">fieldNames</span><span class="p">()</span> <span class="k">if</span> <span class="s2">&quot; &quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">f</span><span class="p">]</span>
<span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__dir__</span><span class="p">())</span> <span class="o">+</span> <span class="n">fields</span>
<span class="k">def</span> <span class="fm">__iter__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="n">MissingPandasLikeSeries</span><span class="o">.</span><span class="fm">__iter__</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span>
<span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">version_info</span> <span class="o">&gt;=</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">7</span><span class="p">):</span>
<span class="c1"># In order to support the type hints such as Series[...]. See DataFrame.__class_getitem__.</span>
<span class="k">def</span> <span class="nf">__class_getitem__</span><span class="p">(</span><span class="bp">cls</span><span class="p">,</span> <span class="n">params</span><span class="p">:</span> <span class="n">Any</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Type</span><span class="p">[</span><span class="n">SeriesType</span><span class="p">]:</span>
<span class="k">return</span> <span class="n">create_type_for_series_type</span><span class="p">(</span><span class="n">params</span><span class="p">)</span>
<span class="k">elif</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span> <span class="o">&lt;=</span> <span class="n">sys</span><span class="o">.</span><span class="n">version_info</span> <span class="o">&lt;</span> <span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">7</span><span class="p">):</span>
<span class="c1"># The implementation is in its metaclass so this flag is needed to distinguish</span>
<span class="c1"># pandas-on-Spark Series.</span>
<span class="n">is_series</span> <span class="o">=</span> <span class="kc">None</span></div>
<span class="k">def</span> <span class="nf">unpack_scalar</span><span class="p">(</span><span class="n">sdf</span><span class="p">:</span> <span class="n">SparkDataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Any</span><span class="p">:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Takes a dataframe that is supposed to contain a single row with a single scalar value,</span>
<span class="sd"> and returns this value.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">lst</span> <span class="o">=</span> <span class="n">sdf</span><span class="o">.</span><span class="n">limit</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span><span class="o">.</span><span class="n">toPandas</span><span class="p">()</span>
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">lst</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">,</span> <span class="p">(</span><span class="n">sdf</span><span class="p">,</span> <span class="n">lst</span><span class="p">)</span>
<span class="n">row</span> <span class="o">=</span> <span class="n">lst</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">lst2</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">row</span><span class="p">)</span>
<span class="k">assert</span> <span class="nb">len</span><span class="p">(</span><span class="n">lst2</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span><span class="p">,</span> <span class="p">(</span><span class="n">row</span><span class="p">,</span> <span class="n">lst2</span><span class="p">)</span>
<span class="k">return</span> <span class="n">lst2</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">first_series</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Series</span><span class="p">:</span>
<span class="o">...</span>
<span class="nd">@overload</span>
<span class="k">def</span> <span class="nf">first_series</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">:</span>
<span class="o">...</span>
<span class="k">def</span> <span class="nf">first_series</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Union</span><span class="p">[</span><span class="n">Series</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">]:</span>
<span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Takes a DataFrame and returns the first column of the DataFrame as a Series</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">assert</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="p">(</span><span class="n">DataFrame</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">)),</span> <span class="nb">type</span><span class="p">(</span><span class="n">df</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">DataFrame</span><span class="p">):</span>
<span class="k">return</span> <span class="n">df</span><span class="o">.</span><span class="n">_psser_for</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">_internal</span><span class="o">.</span><span class="n">column_labels</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">columns</span><span class="p">[</span><span class="mi">0</span><span class="p">]]</span>
<span class="k">def</span> <span class="nf">_test</span><span class="p">()</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">import</span> <span class="nn">doctest</span>
<span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">from</span> <span class="nn">pyspark.sql</span> <span class="kn">import</span> <span class="n">SparkSession</span>
<span class="kn">import</span> <span class="nn">pyspark.pandas.series</span>
<span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">environ</span><span class="p">[</span><span class="s2">&quot;SPARK_HOME&quot;</span><span class="p">])</span>
<span class="n">globs</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span><span class="o">.</span><span class="n">series</span><span class="o">.</span><span class="vm">__dict__</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">globs</span><span class="p">[</span><span class="s2">&quot;ps&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span>
<span class="n">spark</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">SparkSession</span><span class="o">.</span><span class="n">builder</span><span class="o">.</span><span class="n">master</span><span class="p">(</span><span class="s2">&quot;local[4]&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">appName</span><span class="p">(</span><span class="s2">&quot;pyspark.pandas.series tests&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">getOrCreate</span><span class="p">()</span>
<span class="p">)</span>
<span class="p">(</span><span class="n">failure_count</span><span class="p">,</span> <span class="n">test_count</span><span class="p">)</span> <span class="o">=</span> <span class="n">doctest</span><span class="o">.</span><span class="n">testmod</span><span class="p">(</span>
<span class="n">pyspark</span><span class="o">.</span><span class="n">pandas</span><span class="o">.</span><span class="n">series</span><span class="p">,</span>
<span class="n">globs</span><span class="o">=</span><span class="n">globs</span><span class="p">,</span>
<span class="n">optionflags</span><span class="o">=</span><span class="n">doctest</span><span class="o">.</span><span class="n">ELLIPSIS</span> <span class="o">|</span> <span class="n">doctest</span><span class="o">.</span><span class="n">NORMALIZE_WHITESPACE</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">spark</span><span class="o">.</span><span class="n">stop</span><span class="p">()</span>
<span class="k">if</span> <span class="n">failure_count</span><span class="p">:</span>
<span class="n">sys</span><span class="o">.</span><span class="n">exit</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">&quot;__main__&quot;</span><span class="p">:</span>
<span class="n">_test</span><span class="p">()</span>
</pre></div>
</div>
<div class='prev-next-bottom'>
</div>
</main>
</div>
</div>
<script src="../../../_static/js/index.3da636dd464baa7582d2.js"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<p>
&copy; Copyright .<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/>
</p>
</div>
</footer>
</body>
</html>