blob: bd57a22cd660be89a578f03a89fe63630c38f6be [file] [log] [blame]
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>apache_beam.dataframe.frames module &mdash; Apache Beam 2.36.0 documentation</title>
<script type="text/javascript" src="_static/js/modernizr.min.js"></script>
<script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
<script type="text/javascript" src="_static/jquery.js"></script>
<script type="text/javascript" src="_static/underscore.js"></script>
<script type="text/javascript" src="_static/doctools.js"></script>
<script type="text/javascript" src="_static/language_data.js"></script>
<script async="async" type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/javascript" src="_static/js/theme.js"></script>
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="apache_beam.dataframe.io module" href="apache_beam.dataframe.io.html" />
<link rel="prev" title="apache_beam.dataframe.frame_base module" href="apache_beam.dataframe.frame_base.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html" class="icon icon-home"> Apache Beam
</a>
<div class="version">
2.36.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="apache_beam.coders.html">apache_beam.coders package</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="apache_beam.dataframe.html">apache_beam.dataframe package</a><ul class="current">
<li class="toctree-l2 current"><a class="reference internal" href="apache_beam.dataframe.html#submodules">Submodules</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.convert.html">apache_beam.dataframe.convert module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.doctests.html">apache_beam.dataframe.doctests module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.expressions.html">apache_beam.dataframe.expressions module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.frame_base.html">apache_beam.dataframe.frame_base module</a></li>
<li class="toctree-l3 current"><a class="current reference internal" href="#">apache_beam.dataframe.frames module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.io.html">apache_beam.dataframe.io module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.pandas_top_level_functions.html">apache_beam.dataframe.pandas_top_level_functions module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.partitionings.html">apache_beam.dataframe.partitionings module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.schemas.html">apache_beam.dataframe.schemas module</a></li>
<li class="toctree-l3"><a class="reference internal" href="apache_beam.dataframe.transforms.html">apache_beam.dataframe.transforms module</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.io.html">apache_beam.io package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.metrics.html">apache_beam.metrics package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.ml.html">apache_beam.ml package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.options.html">apache_beam.options package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.portability.html">apache_beam.portability package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.runners.html">apache_beam.runners package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.transforms.html">apache_beam.transforms package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.typehints.html">apache_beam.typehints package</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.utils.html">apache_beam.utils package</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.error.html">apache_beam.error module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pipeline.html">apache_beam.pipeline module</a></li>
<li class="toctree-l1"><a class="reference internal" href="apache_beam.pvalue.html">apache_beam.pvalue module</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Apache Beam</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html">Docs</a> &raquo;</li>
<li><a href="apache_beam.dataframe.html">apache_beam.dataframe package</a> &raquo;</li>
<li>apache_beam.dataframe.frames module</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/apache_beam.dataframe.frames.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="module-apache_beam.dataframe.frames">
<span id="apache-beam-dataframe-frames-module"></span><h1>apache_beam.dataframe.frames module<a class="headerlink" href="#module-apache_beam.dataframe.frames" title="Permalink to this headline"></a></h1>
<p>Analogs for <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.html#pandas.DataFrame" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-class docutils literal notranslate"><span class="pre">pandas.DataFrame</span></code></a> and <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.html#pandas.Series" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-class docutils literal notranslate"><span class="pre">pandas.Series</span></code></a>:
<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code></a> and <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredSeries</span></code></a>.</p>
<p>These classes are effectively wrappers around a <a class="reference external" href="https://beam.apache.org/documentation/programming-guide/#what-is-a-schema">schema-aware</a>
<a class="reference internal" href="apache_beam.pvalue.html#apache_beam.pvalue.PCollection" title="apache_beam.pvalue.PCollection"><code class="xref py py-class docutils literal notranslate"><span class="pre">PCollection</span></code></a> that provide a set of operations
compatible with the <a class="reference external" href="https://pandas.pydata.org/">pandas</a> API.</p>
<p>Note that we aim for the Beam DataFrame API to be completely compatible with
the pandas API, but there are some features that are currently unimplemented
for various reasons. Pay particular attention to the <strong>‘Differences from
pandas’</strong> section for each operation to understand where we diverge.</p>
<dl class="class">
<dt id="apache_beam.dataframe.frames.DeferredSeries">
<em class="property">class </em><code class="descclassname">apache_beam.dataframe.frames.</code><code class="descname">DeferredSeries</code><span class="sig-paren">(</span><em>expr</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.dataframe.frames.DeferredDataFrameOrSeries</span></code></p>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.name">
<code class="descname">name</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.name" title="Permalink to this definition"></a></dt>
<dd><p>Return the name of the Series.</p>
<p>The name of a Series becomes its index or column name if it is used
to form a DataFrame. It is also used whenever displaying the Series
using the interpreter.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">The name of the DeferredSeries, also the column name if part of a DeferredDataFrame.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">label (hashable object)</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rename" title="apache_beam.dataframe.frames.DeferredSeries.rename"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredSeries.rename</span></code></a></dt>
<dd>Sets the DeferredSeries name when given a scalar input.</dd>
<dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">Index.name</span></code></dt>
<dd>Corresponding Index property.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">The Series name can be set initially when calling the constructor.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">int64</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">&#39;Numbers&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">2 3</span>
<span class="go">Name: Numbers, dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="s2">&quot;Integers&quot;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">2 3</span>
<span class="go">Name: Integers, dtype: int64</span>
<span class="go">The name of a Series within a DataFrame is its column name.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;Odd Numbers&quot;</span><span class="p">,</span> <span class="s2">&quot;Even Numbers&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> Odd Numbers Even Numbers</span>
<span class="go">0 1 2</span>
<span class="go">1 3 4</span>
<span class="go">2 5 6</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="p">[</span><span class="s2">&quot;Even Numbers&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">name</span>
<span class="go">&#39;Even Numbers&#39;</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.hasnans">
<code class="descname">hasnans</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.hasnans" title="Permalink to this definition"></a></dt>
<dd><p>Return if I have any nans; enables various perf speedups.</p>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.dtype">
<code class="descname">dtype</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.dtype" title="Permalink to this definition"></a></dt>
<dd><p>Return the dtype object of the underlying data.</p>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.dtypes">
<code class="descname">dtypes</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.dtypes" title="Permalink to this definition"></a></dt>
<dd><p>Return the dtype object of the underlying data.</p>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.keys">
<code class="descname">keys</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.keys"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.keys" title="Permalink to this definition"></a></dt>
<dd><p>Return alias for index.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Index of the DeferredSeries.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="apache_beam.dataframe.partitionings.html#apache_beam.dataframe.partitionings.Index" title="apache_beam.dataframe.partitionings.Index">Index</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.T">
<code class="descname">T</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.T" title="Permalink to this definition"></a></dt>
<dd><p>Return the transpose, which is by definition self.</p>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.transpose">
<code class="descname">transpose</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.transpose" title="Permalink to this definition"></a></dt>
<dd><p>Return the transpose, which is by definition self.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body"></td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">%(klass)s</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.shape">
<code class="descname">shape</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.shape" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.shape()</span></code> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.append">
<code class="descname">append</code><span class="sig-paren">(</span><em>to_append</em>, <em>ignore_index</em>, <em>verify_integrity</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.append"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.append" title="Permalink to this definition"></a></dt>
<dd><p>Concatenate two or more Series.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>to_append</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>list/tuple of DeferredSeries</em>) – DeferredSeries to append with self.</li>
<li><strong>ignore_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If True, the resulting axis will be labeled 0, 1, …, n - 1.</li>
<li><strong>verify_integrity</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If True, raise Exception on creating index with duplicates.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Concatenated DeferredSeries.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">ignore_index=True</span></code> is not supported, because it requires generating an
order-sensitive index.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">concat()</span></code></dt>
<dd>General function to concatenate DeferredDataFrame or DeferredSeries objects.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Iteratively appending to a DeferredSeries can be more computationally intensive
than a single concatenate. A better solution is to append values to a
list and then concatenate the list with the original DeferredSeries all at
once.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s3</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">s2</span><span class="p">)</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">2 3</span>
<span class="go">0 4</span>
<span class="go">1 5</span>
<span class="go">2 6</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">s3</span><span class="p">)</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">2 3</span>
<span class="go">3 4</span>
<span class="go">4 5</span>
<span class="go">5 6</span>
<span class="go">dtype: int64</span>
<span class="go">With `ignore_index` set to True:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">s2</span><span class="p">,</span> <span class="n">ignore_index</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">2 3</span>
<span class="go">3 4</span>
<span class="go">4 5</span>
<span class="go">5 6</span>
<span class="go">dtype: int64</span>
<span class="go">With `verify_integrity` set to True:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">s2</span><span class="p">,</span> <span class="n">verify_integrity</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="gt">Traceback (most recent call last):</span>
<span class="c">...</span>
<span class="gr">ValueError</span>: <span class="n">Indexes have overlapping values: [0, 1, 2]</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.align">
<code class="descname">align</code><span class="sig-paren">(</span><em>other</em>, <em>join</em>, <em>axis</em>, <em>level</em>, <em>method</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.align"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.align" title="Permalink to this definition"></a></dt>
<dd><p>Align two objects on their axes with the specified join method.</p>
<p>Join method is specified for each axis Index.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a><em> or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a>) – </li>
<li><strong>join</strong> (<em>{'outer'</em><em>, </em><em>'inner'</em><em>, </em><em>'left'</em><em>, </em><em>'right'}</em><em>, </em><em>default 'outer'</em>) – </li>
<li><strong>axis</strong> (<em>allowed axis of the other object</em><em>, </em><em>default None</em>) – Align on index (0), columns (1), or both (None).</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>copy</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Always returns new objects. If copy=False and no reindexing is
required then original objects are returned.</li>
<li><strong>fill_value</strong> (<em>scalar</em><em>, </em><em>default np.NaN</em>) – Value to use for missing values. Defaults to NaN, but can be any
“compatible” value.</li>
<li><strong>method</strong> (<em>{'backfill'</em><em>, </em><em>'bfill'</em><em>, </em><em>'pad'</em><em>, </em><em>'ffill'</em><em>, </em><em>None}</em><em>, </em><em>default None</em>) – <p>Method to use for filling holes in reindexed DeferredSeries:</p>
<ul>
<li>pad / ffill: propagate last valid observation forward to next valid.</li>
<li>backfill / bfill: use NEXT valid observation to fill gap.</li>
</ul>
</li>
<li><strong>limit</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default None</em>) – If method is specified, this is the maximum number of consecutive
NaN values to forward/backward fill. In other words, if there is
a gap with more than this number of consecutive NaNs, it will only
be partially filled. If method is not specified, this is the
maximum number of entries along the entire axis where NaNs will be
filled. Must be greater than 0 if not None.</li>
<li><strong>fill_axis</strong> (<em>{0</em><em> or </em><em>'index'}</em><em>, </em><em>default 0</em>) – Filling axis, method and limit.</li>
<li><strong>broadcast_axis</strong> (<em>{0</em><em> or </em><em>'index'}</em><em>, </em><em>default None</em>) – Broadcast values along this axis, if aligning two objects of
different dimensions.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><strong>(left, right)</strong> – Aligned objects.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">(<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a>, type of other)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Aligning per-level is not yet supported. Only the default,
<code class="docutils literal notranslate"><span class="pre">level=None</span></code>, is allowed.</p>
<p>Filling NaN values via <code class="docutils literal notranslate"><span class="pre">method</span></code> is not supported, because it is
<a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">order-sensitive</a>.
Only the default, <code class="docutils literal notranslate"><span class="pre">method=None</span></code>, is allowed.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.argsort">
<code class="descname">argsort</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.argsort" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.argsort.html#pandas.Series.argsort" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.argsort()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.array">
<code class="descname">array</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.array" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.array()</span></code> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.get">
<code class="descname">get</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.get" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.get.html#pandas.Series.get" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.get()</span></code></a> is not yet supported in the Beam DataFrame API because the columns in the output DataFrame depend on the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-columns">https://s.apache.org/dataframe-non-deferred-columns</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.ravel">
<code class="descname">ravel</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.ravel" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.ravel.html#pandas.Series.ravel" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.ravel()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.slice_shift">
<code class="descname">slice_shift</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.slice_shift" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.slice_shift.html#pandas.Series.slice_shift" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.slice_shift()</span></code></a> is not yet supported in the Beam DataFrame API because it is deprecated in pandas.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.tshift">
<code class="descname">tshift</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.tshift" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.tshift.html#pandas.Series.tshift" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.tshift()</span></code></a> is not yet supported in the Beam DataFrame API because it is deprecated in pandas.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.rename">
<code class="descname">rename</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.rename" title="Permalink to this definition"></a></dt>
<dd><p>Alter Series index labels or name.</p>
<p>Function / dict values must be unique (1-to-1). Labels not contained in
a dict / Series will be left as-is. Extra labels listed don’t throw an
error.</p>
<p>Alternatively, change <code class="docutils literal notranslate"><span class="pre">Series.name</span></code> with a scalar value.</p>
<p>See the <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/basics.html#basics-rename" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">user guide</span></a> for more.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>&quot;index&quot;}</em>) – Unused. Accepted for compatibility with DeferredDataFrame method only.</li>
<li><strong>index</strong> (<em>scalar</em><em>, </em><em>hashable sequence</em><em>, </em><em>dict-like</em><em> or </em><em>function</em><em>, </em><em>optional</em>) – Functions or dict-like are transformations to apply to
the index.
Scalar or hashable sequence-like will alter the <code class="docutils literal notranslate"><span class="pre">DeferredSeries.name</span></code>
attribute.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments passed to the function. Only the
“inplace” keyword is used.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">DeferredSeries with index labels or name altered or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.rename" title="apache_beam.dataframe.frames.DeferredDataFrame.rename"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.rename()</span></code></a></dt>
<dd>Corresponding DeferredDataFrame method.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rename_axis" title="apache_beam.dataframe.frames.DeferredSeries.rename_axis"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.rename_axis()</span></code></a></dt>
<dd>Set the name of the axis.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">2 3</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="s2">&quot;my_name&quot;</span><span class="p">)</span> <span class="c1"># scalar, changes Series.name</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">2 3</span>
<span class="go">Name: my_name, dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span> <span class="o">**</span> <span class="mi">2</span><span class="p">)</span> <span class="c1"># function, changes labels</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">4 3</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">rename</span><span class="p">({</span><span class="mi">1</span><span class="p">:</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="mi">5</span><span class="p">})</span> <span class="c1"># mapping, changes labels</span>
<span class="go">0 1</span>
<span class="go">3 2</span>
<span class="go">5 3</span>
<span class="go">dtype: int64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.between">
<code class="descname">between</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.between" title="Permalink to this definition"></a></dt>
<dd><p>Return boolean Series equivalent to left &lt;= series &lt;= right.</p>
<p>This function returns a boolean vector containing <cite>True</cite> wherever the
corresponding Series element is between the boundary values <cite>left</cite> and
<cite>right</cite>. NA values are treated as <cite>False</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>left</strong> (<em>scalar</em><em> or </em><em>list-like</em>) – Left boundary.</li>
<li><strong>right</strong> (<em>scalar</em><em> or </em><em>list-like</em>) – Right boundary.</li>
<li><strong>inclusive</strong> (<em>{&quot;both&quot;</em><em>, </em><em>&quot;neither&quot;</em><em>, </em><em>&quot;left&quot;</em><em>, </em><em>&quot;right&quot;}</em>) – <p>Include boundaries. Whether to set each bound as closed or open.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.3.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">DeferredSeries representing whether each element is between left and
right (inclusive).</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.gt" title="apache_beam.dataframe.frames.DeferredSeries.gt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.gt()</span></code></a></dt>
<dd>Greater than of series and other.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.lt" title="apache_beam.dataframe.frames.DeferredSeries.lt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.lt()</span></code></a></dt>
<dd>Less than of series and other.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>This function is equivalent to <code class="docutils literal notranslate"><span class="pre">(left</span> <span class="pre">&lt;=</span> <span class="pre">ser)</span> <span class="pre">&amp;</span> <span class="pre">(ser</span> <span class="pre">&lt;=</span> <span class="pre">right)</span></code></p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span>
<span class="go">Boundary values are included by default:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">between</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span>
<span class="go">0 True</span>
<span class="go">1 False</span>
<span class="go">2 True</span>
<span class="go">3 False</span>
<span class="go">4 False</span>
<span class="go">dtype: bool</span>
<span class="go">With `inclusive` set to ``&quot;neither&quot;`` boundary values are excluded:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">between</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="n">inclusive</span><span class="o">=</span><span class="s2">&quot;neither&quot;</span><span class="p">)</span>
<span class="go">0 True</span>
<span class="go">1 False</span>
<span class="go">2 False</span>
<span class="go">3 False</span>
<span class="go">4 False</span>
<span class="go">dtype: bool</span>
<span class="go">`left` and `right` can be any scalar value:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s1">&#39;Alice&#39;</span><span class="p">,</span> <span class="s1">&#39;Bob&#39;</span><span class="p">,</span> <span class="s1">&#39;Carol&#39;</span><span class="p">,</span> <span class="s1">&#39;Eve&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">between</span><span class="p">(</span><span class="s1">&#39;Anna&#39;</span><span class="p">,</span> <span class="s1">&#39;Daniel&#39;</span><span class="p">)</span>
<span class="go">0 False</span>
<span class="go">1 True</span>
<span class="go">2 True</span>
<span class="go">3 False</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.add_suffix">
<code class="descname">add_suffix</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.add_suffix" title="Permalink to this definition"></a></dt>
<dd><p>Suffix labels with string <cite>suffix</cite>.</p>
<p>For Series, the row labels are suffixed.
For DataFrame, the column labels are suffixed.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>suffix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – The string to add after each label.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">New DeferredSeries or DeferredDataFrame with updated labels.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.add_prefix" title="apache_beam.dataframe.frames.DeferredSeries.add_prefix"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.add_prefix()</span></code></a></dt>
<dd>Prefix row labels with string <cite>prefix</cite>.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add_prefix" title="apache_beam.dataframe.frames.DeferredDataFrame.add_prefix"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add_prefix()</span></code></a></dt>
<dd>Prefix column labels with string <cite>prefix</cite>.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">2 3</span>
<span class="go">3 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">add_suffix</span><span class="p">(</span><span class="s1">&#39;_item&#39;</span><span class="p">)</span>
<span class="go">0_item 1</span>
<span class="go">1_item 2</span>
<span class="go">2_item 3</span>
<span class="go">3_item 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 1 3</span>
<span class="go">1 2 4</span>
<span class="go">2 3 5</span>
<span class="go">3 4 6</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add_suffix</span><span class="p">(</span><span class="s1">&#39;_col&#39;</span><span class="p">)</span>
<span class="go"> A_col B_col</span>
<span class="go">0 1 3</span>
<span class="go">1 2 4</span>
<span class="go">2 3 5</span>
<span class="go">3 4 6</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.add_prefix">
<code class="descname">add_prefix</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.add_prefix" title="Permalink to this definition"></a></dt>
<dd><p>Prefix labels with string <cite>prefix</cite>.</p>
<p>For Series, the row labels are prefixed.
For DataFrame, the column labels are prefixed.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>prefix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – The string to add before each label.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">New DeferredSeries or DeferredDataFrame with updated labels.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.add_suffix" title="apache_beam.dataframe.frames.DeferredSeries.add_suffix"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.add_suffix()</span></code></a></dt>
<dd>Suffix row labels with string <cite>suffix</cite>.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add_suffix" title="apache_beam.dataframe.frames.DeferredDataFrame.add_suffix"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add_suffix()</span></code></a></dt>
<dd>Suffix column labels with string <cite>suffix</cite>.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">2 3</span>
<span class="go">3 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">add_prefix</span><span class="p">(</span><span class="s1">&#39;item_&#39;</span><span class="p">)</span>
<span class="go">item_0 1</span>
<span class="go">item_1 2</span>
<span class="go">item_2 3</span>
<span class="go">item_3 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 1 3</span>
<span class="go">1 2 4</span>
<span class="go">2 3 5</span>
<span class="go">3 4 6</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add_prefix</span><span class="p">(</span><span class="s1">&#39;col_&#39;</span><span class="p">)</span>
<span class="go"> col_A col_B</span>
<span class="go">0 1 3</span>
<span class="go">1 2 4</span>
<span class="go">2 3 5</span>
<span class="go">3 4 6</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.idxmin">
<code class="descname">idxmin</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.idxmin"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.idxmin" title="Permalink to this definition"></a></dt>
<dd><p>Return the row label of the minimum value.</p>
<p>If multiple values equal the minimum, the first row label with that
value is returned.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 0</em>) – For compatibility with DeferredDataFrame.idxmin. Redundant for application
on DeferredSeries.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values. If the entire DeferredSeries is NA, the result
will be NA.</li>
<li><strong>**kwargs</strong> (<em>*args</em><em>,</em>) – <p>Additional arguments and keywords have no effect but might be
accepted for compatibility with NumPy.</p>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Label of the minimum value.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="apache_beam.dataframe.partitionings.html#apache_beam.dataframe.partitionings.Index" title="apache_beam.dataframe.partitionings.Index">Index</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#ValueError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> – If the DeferredSeries is empty.</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.argmin()</span></code></dt>
<dd>Return indices of the minimum values along the given axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmin" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmin()</span></code></a></dt>
<dd>Return index of first occurrence of minimum over requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmax" title="apache_beam.dataframe.frames.DeferredSeries.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmax()</span></code></a></dt>
<dd>Return index <em>label</em> of the first occurrence of maximum of values.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>This method is the DeferredSeries version of <code class="docutils literal notranslate"><span class="pre">ndarray.argmin</span></code>. This method
returns the label of the minimum, while <code class="docutils literal notranslate"><span class="pre">ndarray.argmin</span></code> returns
the position. To get the position, use <code class="docutils literal notranslate"><span class="pre">series.values.argmin()</span></code>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;D&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">A 1.0</span>
<span class="go">B NaN</span>
<span class="go">C 4.0</span>
<span class="go">D 1.0</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">idxmin</span><span class="p">()</span>
<span class="go">&#39;A&#39;</span>
<span class="go">If `skipna` is False and there is an NA value in the data,</span>
<span class="go">the function returns ``nan``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">idxmin</span><span class="p">(</span><span class="n">skipna</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">nan</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.idxmax">
<code class="descname">idxmax</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.idxmax"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.idxmax" title="Permalink to this definition"></a></dt>
<dd><p>Return the row label of the maximum value.</p>
<p>If multiple values equal the maximum, the first row label with that
value is returned.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 0</em>) – For compatibility with DeferredDataFrame.idxmax. Redundant for application
on DeferredSeries.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values. If the entire DeferredSeries is NA, the result
will be NA.</li>
<li><strong>**kwargs</strong> (<em>*args</em><em>,</em>) – <p>Additional arguments and keywords have no effect but might be
accepted for compatibility with NumPy.</p>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Label of the maximum value.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="apache_beam.dataframe.partitionings.html#apache_beam.dataframe.partitionings.Index" title="apache_beam.dataframe.partitionings.Index">Index</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#ValueError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> – If the DeferredSeries is empty.</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.argmax()</span></code></dt>
<dd>Return indices of the maximum values along the given axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmax" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmax()</span></code></a></dt>
<dd>Return index of first occurrence of maximum over requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmin" title="apache_beam.dataframe.frames.DeferredSeries.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmin()</span></code></a></dt>
<dd>Return index <em>label</em> of the first occurrence of minimum of values.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>This method is the DeferredSeries version of <code class="docutils literal notranslate"><span class="pre">ndarray.argmax</span></code>. This method
returns the label of the maximum, while <code class="docutils literal notranslate"><span class="pre">ndarray.argmax</span></code> returns
the position. To get the position, use <code class="docutils literal notranslate"><span class="pre">series.values.argmax()</span></code>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;D&#39;</span><span class="p">,</span> <span class="s1">&#39;E&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">A 1.0</span>
<span class="go">B NaN</span>
<span class="go">C 4.0</span>
<span class="go">D 3.0</span>
<span class="go">E 4.0</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">idxmax</span><span class="p">()</span>
<span class="go">&#39;C&#39;</span>
<span class="go">If `skipna` is False and there is an NA value in the data,</span>
<span class="go">the function returns ``nan``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">idxmax</span><span class="p">(</span><span class="n">skipna</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">nan</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.explode">
<code class="descname">explode</code><span class="sig-paren">(</span><em>ignore_index</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.explode"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.explode" title="Permalink to this definition"></a></dt>
<dd><p>Transform each element of a list-like to a row.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 0.25.0.</span></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>ignore_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>If True, the resulting index will be labeled 0, 1, …, n - 1.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">Exploded lists to rows; index will be duplicated for these rows.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.str.split()</span></code></dt>
<dd>Split string values on specified separator.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.unstack" title="apache_beam.dataframe.frames.DeferredSeries.unstack"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.unstack()</span></code></a></dt>
<dd>Unstack, a.k.a. pivot, DeferredSeries with MultiIndex to produce DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.melt" title="apache_beam.dataframe.frames.DeferredDataFrame.melt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.melt()</span></code></a></dt>
<dd>Unpivot a DeferredDataFrame from wide format to long format.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.explode" title="apache_beam.dataframe.frames.DeferredDataFrame.explode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.explode()</span></code></a></dt>
<dd>Explode a DeferredDataFrame from list-like columns to long format.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>This routine will explode list-likes including lists, tuples, sets,
DeferredSeries, and np.ndarray. The result dtype of the subset rows will
be object. Scalars will be returned unchanged, and empty list-likes will
result in a np.nan for that row. In addition, the ordering of elements in
the output will be non-deterministic when exploding sets.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s1">&#39;foo&#39;</span><span class="p">,</span> <span class="p">[],</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 [1, 2, 3]</span>
<span class="go">1 foo</span>
<span class="go">2 []</span>
<span class="go">3 [3, 4]</span>
<span class="go">dtype: object</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">explode</span><span class="p">()</span>
<span class="go">0 1</span>
<span class="go">0 2</span>
<span class="go">0 3</span>
<span class="go">1 foo</span>
<span class="go">2 NaN</span>
<span class="go">3 3</span>
<span class="go">3 4</span>
<span class="go">dtype: object</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.dot">
<code class="descname">dot</code><span class="sig-paren">(</span><em>other</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.dot"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.dot" title="Permalink to this definition"></a></dt>
<dd><p>Compute the matrix multiplication between the DataFrame and other.</p>
<p>This method computes the matrix product between the DataFrame and the
values of an other Series, DataFrame or a numpy array.</p>
<p>It can also be called using <code class="docutils literal notranslate"><span class="pre">self</span> <span class="pre">&#64;</span> <span class="pre">other</span></code> in Python &gt;= 3.5.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a><em> or </em><em>array-like</em>) – The other object to compute the matrix product with.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">If other is a DeferredSeries, return the matrix product between self and
other as a DeferredSeries. If other is a DeferredDataFrame or a numpy.array, return
the matrix product of self and other in a DeferredDataFrame of a np.array.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">other</span></code> must be a <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code></a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredSeries</span></code></a>
instance. Computing the dot product with an array-like is not supported
because it is order-sensitive.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.dot" title="apache_beam.dataframe.frames.DeferredSeries.dot"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.dot()</span></code></a></dt>
<dd>Similar method for DeferredSeries.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>The dimensions of DeferredDataFrame and other must be compatible in order to
compute the matrix multiplication. In addition, the column names of
DeferredDataFrame and the index of other must contain the same values, as they
will be aligned prior to the multiplication.</p>
<p>The dot method for DeferredSeries computes the inner product, instead of the
matrix product here.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Here we multiply a DataFrame with a Series.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">2</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
<span class="go">0 -4</span>
<span class="go">1 5</span>
<span class="go">dtype: int64</span>
<span class="go">Here we multiply a DataFrame with another DataFrame.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
<span class="go"> 0 1</span>
<span class="go">0 1 4</span>
<span class="go">1 2 2</span>
<span class="go">Note that the dot method give the same result as @</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">@</span> <span class="n">other</span>
<span class="go"> 0 1</span>
<span class="go">0 1 4</span>
<span class="go">1 2 2</span>
<span class="go">The dot method works also if other is an np.array.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">arr</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">arr</span><span class="p">)</span>
<span class="go"> 0 1</span>
<span class="go">0 1 4</span>
<span class="go">1 2 2</span>
<span class="go">Note how shuffling of the objects does not change the result.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s2</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">reindex</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">s2</span><span class="p">)</span>
<span class="go">0 -4</span>
<span class="go">1 5</span>
<span class="go">dtype: int64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.nunique">
<code class="descname">nunique</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.nunique"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.nunique" title="Permalink to this definition"></a></dt>
<dd><p>Return number of unique elements in the object.</p>
<p>Excludes NA values by default.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>dropna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Don’t include NaN in the count.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"></td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)">int</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.nunique" title="apache_beam.dataframe.frames.DeferredDataFrame.nunique"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.nunique()</span></code></a></dt>
<dd>Method nunique for DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.count" title="apache_beam.dataframe.frames.DeferredSeries.count"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.count()</span></code></a></dt>
<dd>Count non-NA/null observations in the DeferredSeries.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">7</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 1</span>
<span class="go">1 3</span>
<span class="go">2 5</span>
<span class="go">3 7</span>
<span class="go">4 7</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">nunique</span><span class="p">()</span>
<span class="go">4</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.quantile">
<code class="descname">quantile</code><span class="sig-paren">(</span><em>q</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.quantile"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.quantile" title="Permalink to this definition"></a></dt>
<dd><p>Return value at the given quantile.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>q</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><em>array-like</em><em>, </em><em>default 0.5</em><em> (</em><em>50% quantile</em><em>)</em>) – The quantile(s) to compute, which can lie in range: 0 &lt;= q &lt;= 1.</li>
<li><strong>interpolation</strong> (<em>{'linear'</em><em>, </em><em>'lower'</em><em>, </em><em>'higher'</em><em>, </em><em>'midpoint'</em><em>, </em><em>'nearest'}</em>) – <p>This optional parameter specifies the interpolation method to use,
when the desired quantile lies between two data points <cite>i</cite> and <cite>j</cite>:</p>
<blockquote>
<div><ul>
<li>linear: <cite>i + (j - i) * fraction</cite>, where <cite>fraction</cite> is the
fractional part of the index surrounded by <cite>i</cite> and <cite>j</cite>.</li>
<li>lower: <cite>i</cite>.</li>
<li>higher: <cite>j</cite>.</li>
<li>nearest: <cite>i</cite> or <cite>j</cite> whichever is nearest.</li>
<li>midpoint: (<cite>i</cite> + <cite>j</cite>) / 2.</li>
</ul>
</div></blockquote>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">If <code class="docutils literal notranslate"><span class="pre">q</span></code> is an array, a DeferredSeries will be returned where the
index is <code class="docutils literal notranslate"><span class="pre">q</span></code> and the values are the quantiles, otherwise
a float will be returned.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)">float</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>quantile is not parallelizable. See
<a class="reference external" href="https://issues.apache.org/jira/browse/BEAM-12167">BEAM-12167</a> tracking
the possible addition of an approximate, parallelizable implementation of
quantile.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">core.window.Rolling.quantile()</span></code></dt>
<dd>Calculate the rolling quantile.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.percentile()</span></code></dt>
<dd>Returns the q-th percentile(s) of the array elements.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="mf">.5</span><span class="p">)</span>
<span class="go">2.5</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">quantile</span><span class="p">([</span><span class="mf">.25</span><span class="p">,</span> <span class="mf">.5</span><span class="p">,</span> <span class="mf">.75</span><span class="p">])</span>
<span class="go">0.25 1.75</span>
<span class="go">0.50 2.50</span>
<span class="go">0.75 3.25</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.std">
<code class="descname">std</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.std"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.std" title="Permalink to this definition"></a></dt>
<dd><p>Return sample standard deviation over requested axis.</p>
<p>Normalized by N-1 by default. This can be changed using the ddof argument</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>}</em>) – </li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values. If an entire row/column is NA, the result
will be NA.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
<li><strong>ddof</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 1</em>) – Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
where N represents the number of elements.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Notes</p>
<p>To have the same behaviour as <cite>numpy.std</cite>, use <cite>ddof=0</cite> (instead of the
default <cite>ddof=1</cite>)</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.var">
<code class="descname">var</code><span class="sig-paren">(</span><em>axis</em>, <em>skipna</em>, <em>level</em>, <em>ddof</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.var"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.var" title="Permalink to this definition"></a></dt>
<dd><p>Return unbiased variance over requested axis.</p>
<p>Normalized by N-1 by default. This can be changed using the ddof argument</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>}</em>) – </li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values. If an entire row/column is NA, the result
will be NA.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
<li><strong>ddof</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 1</em>) – Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
where N represents the number of elements.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Per-level aggregation is not yet supported (BEAM-11777). Only the
default, <code class="docutils literal notranslate"><span class="pre">level=None</span></code>, is allowed.</p>
<p class="rubric">Notes</p>
<p>To have the same behaviour as <cite>numpy.std</cite>, use <cite>ddof=0</cite> (instead of the
default <cite>ddof=1</cite>)</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.corr">
<code class="descname">corr</code><span class="sig-paren">(</span><em>other</em>, <em>method</em>, <em>min_periods</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.corr"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.corr" title="Permalink to this definition"></a></dt>
<dd><p>Compute correlation with <cite>other</cite> Series, excluding missing values.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a>) – DeferredSeries with which to compute the correlation.</li>
<li><strong>method</strong> (<em>{'pearson'</em><em>, </em><em>'kendall'</em><em>, </em><em>'spearman'}</em><em> or </em><em>callable</em>) – <p>Method used to compute correlation:</p>
<ul>
<li>pearson : Standard correlation coefficient</li>
<li>kendall : Kendall Tau correlation coefficient</li>
<li>spearman : Spearman rank correlation</li>
<li>callable: Callable with input two 1d ndarrays and returning a float.</li>
</ul>
<div class="admonition warning">
<p class="first admonition-title">Warning</p>
<p class="last">Note that the returned matrix from corr will have 1 along the
diagonals and will be symmetric regardless of the callable’s
behavior.</p>
</div>
</li>
<li><strong>min_periods</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – Minimum number of observations needed to have a valid result.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Correlation with other.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)">float</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only <code class="docutils literal notranslate"><span class="pre">method='pearson'</span></code> is currently parallelizable.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.corr" title="apache_beam.dataframe.frames.DeferredDataFrame.corr"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.corr()</span></code></a></dt>
<dd>Compute pairwise correlation between columns.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.corrwith" title="apache_beam.dataframe.frames.DeferredDataFrame.corrwith"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.corrwith()</span></code></a></dt>
<dd>Compute pairwise correlation with another DeferredDataFrame or DeferredSeries.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="k">def</span> <span class="nf">histogram_intersection</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">):</span>
<span class="gp">... </span> <span class="n">v</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">minimum</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="n">decimals</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="gp">... </span> <span class="k">return</span> <span class="n">v</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mf">.2</span><span class="p">,</span> <span class="mf">.0</span><span class="p">,</span> <span class="mf">.6</span><span class="p">,</span> <span class="mf">.2</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mf">.3</span><span class="p">,</span> <span class="mf">.6</span><span class="p">,</span> <span class="mf">.0</span><span class="p">,</span> <span class="mf">.1</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span><span class="o">.</span><span class="n">corr</span><span class="p">(</span><span class="n">s2</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="n">histogram_intersection</span><span class="p">)</span>
<span class="go">0.3</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.skew">
<code class="descname">skew</code><span class="sig-paren">(</span><em>axis</em>, <em>skipna</em>, <em>level</em>, <em>numeric_only</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.skew"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.skew" title="Permalink to this definition"></a></dt>
<dd><p>Return unbiased skew over requested axis.</p>
<p>Normalized by N-1.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.kurtosis">
<code class="descname">kurtosis</code><span class="sig-paren">(</span><em>axis</em>, <em>skipna</em>, <em>level</em>, <em>numeric_only</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.kurtosis"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.kurtosis" title="Permalink to this definition"></a></dt>
<dd><p>Return unbiased kurtosis over requested axis.</p>
<p>Kurtosis obtained using Fisher’s definition of
kurtosis (kurtosis of normal == 0.0). Normalized by N-1.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.kurt">
<code class="descname">kurt</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.kurt"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.kurt" title="Permalink to this definition"></a></dt>
<dd><p>Return unbiased kurtosis over requested axis.</p>
<p>Kurtosis obtained using Fisher’s definition of
kurtosis (kurtosis of normal == 0.0). Normalized by N-1.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.cov">
<code class="descname">cov</code><span class="sig-paren">(</span><em>other</em>, <em>min_periods</em>, <em>ddof</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.cov"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.cov" title="Permalink to this definition"></a></dt>
<dd><p>Compute covariance with Series, excluding missing values.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a>) – DeferredSeries with which to compute the covariance.</li>
<li><strong>min_periods</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – Minimum number of observations needed to have a valid result.</li>
<li><strong>ddof</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 1</em>) – <p>Delta degrees of freedom. The divisor used in calculations
is <code class="docutils literal notranslate"><span class="pre">N</span> <span class="pre">-</span> <span class="pre">ddof</span></code>, where <code class="docutils literal notranslate"><span class="pre">N</span></code> represents the number of elements.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Covariance between DeferredSeries and other normalized by N-1
(unbiased estimator).</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)">float</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.cov" title="apache_beam.dataframe.frames.DeferredDataFrame.cov"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.cov()</span></code></a></dt>
<dd>Compute pairwise covariance of columns.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mf">0.90010907</span><span class="p">,</span> <span class="mf">0.13484424</span><span class="p">,</span> <span class="mf">0.62036035</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mf">0.12528585</span><span class="p">,</span> <span class="mf">0.26962463</span><span class="p">,</span> <span class="mf">0.51111198</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span><span class="o">.</span><span class="n">cov</span><span class="p">(</span><span class="n">s2</span><span class="p">)</span>
<span class="go">-0.01685762652715874</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.dropna">
<code class="descname">dropna</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.dropna"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.dropna" title="Permalink to this definition"></a></dt>
<dd><p>Return a new Series with missing values removed.</p>
<p>See the <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/missing_data.html#missing-data" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">User Guide</span></a> for more on which values are
considered missing, and how to work with missing data.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'}</em><em>, </em><em>default 0</em>) – There is only one axis to drop values from.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If True, do operation inplace and return None.</li>
<li><strong>how</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – Not in use. Kept for compatibility.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">DeferredSeries with NA entries dropped from it or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.isna" title="apache_beam.dataframe.frames.DeferredSeries.isna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.isna()</span></code></a></dt>
<dd>Indicate missing values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.notna" title="apache_beam.dataframe.frames.DeferredSeries.notna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.notna()</span></code></a></dt>
<dd>Indicate existing (non-missing) values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.fillna" title="apache_beam.dataframe.frames.DeferredSeries.fillna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.fillna()</span></code></a></dt>
<dd>Replace missing values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.dropna" title="apache_beam.dataframe.frames.DeferredDataFrame.dropna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.dropna()</span></code></a></dt>
<dd>Drop rows or columns which contain NA values.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">Index.dropna()</span></code></dt>
<dd>Drop missing indices.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mf">1.</span><span class="p">,</span> <span class="mf">2.</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span>
<span class="go">0 1.0</span>
<span class="go">1 2.0</span>
<span class="go">2 NaN</span>
<span class="go">dtype: float64</span>
<span class="go">Drop NA values from a Series.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">dropna</span><span class="p">()</span>
<span class="go">0 1.0</span>
<span class="go">1 2.0</span>
<span class="go">dtype: float64</span>
<span class="go">Keep the Series with valid entries in the same variable.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span>
<span class="go">0 1.0</span>
<span class="go">1 2.0</span>
<span class="go">dtype: float64</span>
<span class="go">Empty strings are not considered NA values. ``None`` is considered an</span>
<span class="go">NA value.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">NaT</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="s1">&#39;I stay&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span>
<span class="go">0 NaN</span>
<span class="go">1 2</span>
<span class="go">2 NaT</span>
<span class="go">3</span>
<span class="go">4 None</span>
<span class="go">5 I stay</span>
<span class="go">dtype: object</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">dropna</span><span class="p">()</span>
<span class="go">1 2</span>
<span class="go">3</span>
<span class="go">5 I stay</span>
<span class="go">dtype: object</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.set_axis">
<code class="descname">set_axis</code><span class="sig-paren">(</span><em>labels</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.set_axis"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.set_axis" title="Permalink to this definition"></a></dt>
<dd><p>Assign desired index to given axis.</p>
<p>Indexes for row labels can be changed by assigning
a list-like or Index.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>labels</strong> (<em>list-like</em><em>, </em><a class="reference internal" href="apache_beam.dataframe.partitionings.html#apache_beam.dataframe.partitionings.Index" title="apache_beam.dataframe.partitionings.Index"><em>Index</em></a>) – The values for the new index.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'}</em><em>, </em><em>default 0</em>) – The axis to update. The value 0 identifies the rows.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Whether to return a new DeferredSeries instance.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><strong>renamed</strong> – An object of type DeferredSeries or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rename_axis" title="apache_beam.dataframe.frames.DeferredSeries.rename_axis"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.rename_axis()</span></code></a></dt>
<dd>Alter the name of the index. Examples ——– &gt;&gt;&gt; s = pd.DeferredSeries([1, 2, 3]) &gt;&gt;&gt; s 0 1 1 2 2 3</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.dtype" title="apache_beam.dataframe.frames.DeferredSeries.dtype"><code class="xref py py-meth docutils literal notranslate"><span class="pre">dtype()</span></code></a></dt>
<dd>int64 &gt;&gt;&gt; s.set_axis([‘a’, ‘b’, ‘c’], axis=0) a 1 b 2 c 3</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.dtype" title="apache_beam.dataframe.frames.DeferredSeries.dtype"><code class="xref py py-meth docutils literal notranslate"><span class="pre">dtype()</span></code></a></dt>
<dd>int64</dd>
</dl>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.isnull">
<code class="descname">isnull</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.isnull" title="Permalink to this definition"></a></dt>
<dd><p>Detect missing values.</p>
<p>Return a boolean same-sized object indicating if the values are NA.
NA values, such as None or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.NaN</span></code>, gets mapped to True
values.
Everything else gets mapped to False values. Characters such as empty
strings <code class="docutils literal notranslate"><span class="pre">''</span></code> or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.inf</span></code> are not considered NA values
(unless you set <code class="docutils literal notranslate"><span class="pre">pandas.options.mode.use_inf_as_na</span> <span class="pre">=</span> <span class="pre">True</span></code>).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Mask of bool values for each element in DeferredSeries that
indicates whether an element is an NA value.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.isnull" title="apache_beam.dataframe.frames.DeferredSeries.isnull"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.isnull()</span></code></a></dt>
<dd>Alias of isna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.notna" title="apache_beam.dataframe.frames.DeferredSeries.notna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.notna()</span></code></a></dt>
<dd>Boolean inverse of isna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.dropna" title="apache_beam.dataframe.frames.DeferredSeries.dropna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.dropna()</span></code></a></dt>
<dd>Omit axes labels with missing values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.isna" title="apache_beam.dataframe.frames.DeferredSeries.isna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">isna()</span></code></a></dt>
<dd>Top-level isna.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Show which entries in a DataFrame are NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="nb">dict</span><span class="p">(</span><span class="n">age</span><span class="o">=</span><span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">born</span><span class="o">=</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">NaT</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1939-05-27&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1940-04-25&#39;</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">name</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;Alfred&#39;</span><span class="p">,</span> <span class="s1">&#39;Batman&#39;</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">toy</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="s1">&#39;Batmobile&#39;</span><span class="p">,</span> <span class="s1">&#39;Joker&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> age born name toy</span>
<span class="go">0 5.0 NaT Alfred None</span>
<span class="go">1 6.0 1939-05-27 Batman Batmobile</span>
<span class="go">2 NaN 1940-04-25 Joker</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">isna</span><span class="p">()</span>
<span class="go"> age born name toy</span>
<span class="go">0 False True False True</span>
<span class="go">1 False False False False</span>
<span class="go">2 True False False False</span>
<span class="go">Show which entries in a Series are NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span>
<span class="go">0 5.0</span>
<span class="go">1 6.0</span>
<span class="go">2 NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">isna</span><span class="p">()</span>
<span class="go">0 False</span>
<span class="go">1 False</span>
<span class="go">2 True</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.isna">
<code class="descname">isna</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.isna" title="Permalink to this definition"></a></dt>
<dd><p>Detect missing values.</p>
<p>Return a boolean same-sized object indicating if the values are NA.
NA values, such as None or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.NaN</span></code>, gets mapped to True
values.
Everything else gets mapped to False values. Characters such as empty
strings <code class="docutils literal notranslate"><span class="pre">''</span></code> or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.inf</span></code> are not considered NA values
(unless you set <code class="docutils literal notranslate"><span class="pre">pandas.options.mode.use_inf_as_na</span> <span class="pre">=</span> <span class="pre">True</span></code>).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Mask of bool values for each element in DeferredSeries that
indicates whether an element is an NA value.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.isnull" title="apache_beam.dataframe.frames.DeferredSeries.isnull"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.isnull()</span></code></a></dt>
<dd>Alias of isna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.notna" title="apache_beam.dataframe.frames.DeferredSeries.notna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.notna()</span></code></a></dt>
<dd>Boolean inverse of isna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.dropna" title="apache_beam.dataframe.frames.DeferredSeries.dropna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.dropna()</span></code></a></dt>
<dd>Omit axes labels with missing values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.isna" title="apache_beam.dataframe.frames.DeferredSeries.isna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">isna()</span></code></a></dt>
<dd>Top-level isna.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Show which entries in a DataFrame are NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="nb">dict</span><span class="p">(</span><span class="n">age</span><span class="o">=</span><span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">born</span><span class="o">=</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">NaT</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1939-05-27&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1940-04-25&#39;</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">name</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;Alfred&#39;</span><span class="p">,</span> <span class="s1">&#39;Batman&#39;</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">toy</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="s1">&#39;Batmobile&#39;</span><span class="p">,</span> <span class="s1">&#39;Joker&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> age born name toy</span>
<span class="go">0 5.0 NaT Alfred None</span>
<span class="go">1 6.0 1939-05-27 Batman Batmobile</span>
<span class="go">2 NaN 1940-04-25 Joker</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">isna</span><span class="p">()</span>
<span class="go"> age born name toy</span>
<span class="go">0 False True False True</span>
<span class="go">1 False False False False</span>
<span class="go">2 True False False False</span>
<span class="go">Show which entries in a Series are NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span>
<span class="go">0 5.0</span>
<span class="go">1 6.0</span>
<span class="go">2 NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">isna</span><span class="p">()</span>
<span class="go">0 False</span>
<span class="go">1 False</span>
<span class="go">2 True</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.notnull">
<code class="descname">notnull</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.notnull" title="Permalink to this definition"></a></dt>
<dd><p>Detect existing (non-missing) values.</p>
<p>Return a boolean same-sized object indicating if the values are not NA.
Non-missing values get mapped to True. Characters such as empty
strings <code class="docutils literal notranslate"><span class="pre">''</span></code> or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.inf</span></code> are not considered NA values
(unless you set <code class="docutils literal notranslate"><span class="pre">pandas.options.mode.use_inf_as_na</span> <span class="pre">=</span> <span class="pre">True</span></code>).
NA values, such as None or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.NaN</span></code>, get mapped to False
values.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Mask of bool values for each element in DeferredSeries that
indicates whether an element is not an NA value.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.notnull" title="apache_beam.dataframe.frames.DeferredSeries.notnull"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.notnull()</span></code></a></dt>
<dd>Alias of notna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.isna" title="apache_beam.dataframe.frames.DeferredSeries.isna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.isna()</span></code></a></dt>
<dd>Boolean inverse of notna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.dropna" title="apache_beam.dataframe.frames.DeferredSeries.dropna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.dropna()</span></code></a></dt>
<dd>Omit axes labels with missing values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.notna" title="apache_beam.dataframe.frames.DeferredSeries.notna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">notna()</span></code></a></dt>
<dd>Top-level notna.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Show which entries in a DataFrame are not NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="nb">dict</span><span class="p">(</span><span class="n">age</span><span class="o">=</span><span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">born</span><span class="o">=</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">NaT</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1939-05-27&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1940-04-25&#39;</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">name</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;Alfred&#39;</span><span class="p">,</span> <span class="s1">&#39;Batman&#39;</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">toy</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="s1">&#39;Batmobile&#39;</span><span class="p">,</span> <span class="s1">&#39;Joker&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> age born name toy</span>
<span class="go">0 5.0 NaT Alfred None</span>
<span class="go">1 6.0 1939-05-27 Batman Batmobile</span>
<span class="go">2 NaN 1940-04-25 Joker</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">notna</span><span class="p">()</span>
<span class="go"> age born name toy</span>
<span class="go">0 True False True False</span>
<span class="go">1 True True True True</span>
<span class="go">2 False True True True</span>
<span class="go">Show which entries in a Series are not NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span>
<span class="go">0 5.0</span>
<span class="go">1 6.0</span>
<span class="go">2 NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">notna</span><span class="p">()</span>
<span class="go">0 True</span>
<span class="go">1 True</span>
<span class="go">2 False</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.notna">
<code class="descname">notna</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.notna" title="Permalink to this definition"></a></dt>
<dd><p>Detect existing (non-missing) values.</p>
<p>Return a boolean same-sized object indicating if the values are not NA.
Non-missing values get mapped to True. Characters such as empty
strings <code class="docutils literal notranslate"><span class="pre">''</span></code> or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.inf</span></code> are not considered NA values
(unless you set <code class="docutils literal notranslate"><span class="pre">pandas.options.mode.use_inf_as_na</span> <span class="pre">=</span> <span class="pre">True</span></code>).
NA values, such as None or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.NaN</span></code>, get mapped to False
values.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Mask of bool values for each element in DeferredSeries that
indicates whether an element is not an NA value.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.notnull" title="apache_beam.dataframe.frames.DeferredSeries.notnull"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.notnull()</span></code></a></dt>
<dd>Alias of notna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.isna" title="apache_beam.dataframe.frames.DeferredSeries.isna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.isna()</span></code></a></dt>
<dd>Boolean inverse of notna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.dropna" title="apache_beam.dataframe.frames.DeferredSeries.dropna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.dropna()</span></code></a></dt>
<dd>Omit axes labels with missing values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.notna" title="apache_beam.dataframe.frames.DeferredSeries.notna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">notna()</span></code></a></dt>
<dd>Top-level notna.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Show which entries in a DataFrame are not NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="nb">dict</span><span class="p">(</span><span class="n">age</span><span class="o">=</span><span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">born</span><span class="o">=</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">NaT</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1939-05-27&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1940-04-25&#39;</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">name</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;Alfred&#39;</span><span class="p">,</span> <span class="s1">&#39;Batman&#39;</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">toy</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="s1">&#39;Batmobile&#39;</span><span class="p">,</span> <span class="s1">&#39;Joker&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> age born name toy</span>
<span class="go">0 5.0 NaT Alfred None</span>
<span class="go">1 6.0 1939-05-27 Batman Batmobile</span>
<span class="go">2 NaN 1940-04-25 Joker</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">notna</span><span class="p">()</span>
<span class="go"> age born name toy</span>
<span class="go">0 True False True False</span>
<span class="go">1 True True True True</span>
<span class="go">2 False True True True</span>
<span class="go">Show which entries in a Series are not NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span>
<span class="go">0 5.0</span>
<span class="go">1 6.0</span>
<span class="go">2 NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">notna</span><span class="p">()</span>
<span class="go">0 True</span>
<span class="go">1 True</span>
<span class="go">2 False</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.items">
<code class="descname">items</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.items" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.items.html#pandas.Series.items" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.items()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.iteritems">
<code class="descname">iteritems</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.iteritems" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.iteritems.html#pandas.Series.iteritems" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.iteritems()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.tolist">
<code class="descname">tolist</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.tolist" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.tolist.html#pandas.Series.tolist" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.tolist()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_numpy">
<code class="descname">to_numpy</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_numpy" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.to_numpy.html#pandas.Series.to_numpy" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.to_numpy()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_string">
<code class="descname">to_string</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_string" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.to_string.html#pandas.Series.to_string" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.to_string()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.duplicated">
<code class="descname">duplicated</code><span class="sig-paren">(</span><em>keep</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.duplicated"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.duplicated" title="Permalink to this definition"></a></dt>
<dd><p>Indicate duplicate Series values.</p>
<p>Duplicated values are indicated as <code class="docutils literal notranslate"><span class="pre">True</span></code> values in the resulting
Series. Either all duplicates, all except the first or all except the
last occurrence of duplicates can be indicated.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>keep</strong> (<em>{'first'</em><em>, </em><em>'last'</em><em>, </em><em>False}</em><em>, </em><em>default 'first'</em>) – <p>Method to handle dropping duplicates:</p>
<ul class="simple">
<li>’first’ : Mark duplicates as <code class="docutils literal notranslate"><span class="pre">True</span></code> except for the first
occurrence.</li>
<li>’last’ : Mark duplicates as <code class="docutils literal notranslate"><span class="pre">True</span></code> except for the last
occurrence.</li>
<li><code class="docutils literal notranslate"><span class="pre">False</span></code> : Mark all duplicates as <code class="docutils literal notranslate"><span class="pre">True</span></code>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">DeferredSeries indicating whether each value has occurred in the
preceding values.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a>[<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)">bool</a>]</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only <code class="docutils literal notranslate"><span class="pre">keep=False</span></code> and <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> are supported. Other values of
<code class="docutils literal notranslate"><span class="pre">keep</span></code> make this an order-sensitive operation. Note <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> is
a Beam-specific option that guarantees only one duplicate will be kept, but
unlike <code class="docutils literal notranslate"><span class="pre">&quot;first&quot;</span></code> and <code class="docutils literal notranslate"><span class="pre">&quot;last&quot;</span></code> it makes no guarantees about _which_
duplicate element is kept.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">Index.duplicated()</span></code></dt>
<dd>Equivalent method on pandas.Index.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.duplicated" title="apache_beam.dataframe.frames.DeferredDataFrame.duplicated"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.duplicated()</span></code></a></dt>
<dd>Equivalent method on pandas.DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.drop_duplicates" title="apache_beam.dataframe.frames.DeferredSeries.drop_duplicates"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.drop_duplicates()</span></code></a></dt>
<dd>Remove duplicate values from DeferredSeries.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">By default, for each set of duplicated values, the first occurrence is</span>
<span class="go">set on False and all others on True:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">animals</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s1">&#39;lama&#39;</span><span class="p">,</span> <span class="s1">&#39;cow&#39;</span><span class="p">,</span> <span class="s1">&#39;lama&#39;</span><span class="p">,</span> <span class="s1">&#39;beetle&#39;</span><span class="p">,</span> <span class="s1">&#39;lama&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">animals</span><span class="o">.</span><span class="n">duplicated</span><span class="p">()</span>
<span class="go">0 False</span>
<span class="go">1 False</span>
<span class="go">2 True</span>
<span class="go">3 False</span>
<span class="go">4 True</span>
<span class="go">dtype: bool</span>
<span class="go">which is equivalent to</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">animals</span><span class="o">.</span><span class="n">duplicated</span><span class="p">(</span><span class="n">keep</span><span class="o">=</span><span class="s1">&#39;first&#39;</span><span class="p">)</span>
<span class="go">0 False</span>
<span class="go">1 False</span>
<span class="go">2 True</span>
<span class="go">3 False</span>
<span class="go">4 True</span>
<span class="go">dtype: bool</span>
<span class="go">By using &#39;last&#39;, the last occurrence of each set of duplicated values</span>
<span class="go">is set on False and all others on True:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">animals</span><span class="o">.</span><span class="n">duplicated</span><span class="p">(</span><span class="n">keep</span><span class="o">=</span><span class="s1">&#39;last&#39;</span><span class="p">)</span>
<span class="go">0 True</span>
<span class="go">1 False</span>
<span class="go">2 True</span>
<span class="go">3 False</span>
<span class="go">4 False</span>
<span class="go">dtype: bool</span>
<span class="go">By setting keep on ``False``, all duplicates are True:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">animals</span><span class="o">.</span><span class="n">duplicated</span><span class="p">(</span><span class="n">keep</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">0 True</span>
<span class="go">1 False</span>
<span class="go">2 True</span>
<span class="go">3 False</span>
<span class="go">4 True</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.drop_duplicates">
<code class="descname">drop_duplicates</code><span class="sig-paren">(</span><em>keep</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.drop_duplicates"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.drop_duplicates" title="Permalink to this definition"></a></dt>
<dd><p>Return Series with duplicate values removed.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>keep</strong> ({‘first’, ‘last’, <code class="docutils literal notranslate"><span class="pre">False</span></code>}, default ‘first’) – <p>Method to handle dropping duplicates:</p>
<ul>
<li>’first’ : Drop duplicates except for the first occurrence.</li>
<li>’last’ : Drop duplicates except for the last occurrence.</li>
<li><code class="docutils literal notranslate"><span class="pre">False</span></code> : Drop all duplicates.</li>
</ul>
</li>
<li><strong>inplace</strong> (bool, default <code class="docutils literal notranslate"><span class="pre">False</span></code>) – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, performs operation inplace and returns None.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">DeferredSeries with duplicates dropped or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only <code class="docutils literal notranslate"><span class="pre">keep=False</span></code> and <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> are supported. Other values of
<code class="docutils literal notranslate"><span class="pre">keep</span></code> make this an order-sensitive operation. Note <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> is
a Beam-specific option that guarantees only one duplicate will be kept, but
unlike <code class="docutils literal notranslate"><span class="pre">&quot;first&quot;</span></code> and <code class="docutils literal notranslate"><span class="pre">&quot;last&quot;</span></code> it makes no guarantees about _which_
duplicate element is kept.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">Index.drop_duplicates()</span></code></dt>
<dd>Equivalent method on Index.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.drop_duplicates" title="apache_beam.dataframe.frames.DeferredDataFrame.drop_duplicates"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.drop_duplicates()</span></code></a></dt>
<dd>Equivalent method on DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.duplicated" title="apache_beam.dataframe.frames.DeferredSeries.duplicated"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.duplicated()</span></code></a></dt>
<dd>Related method on DeferredSeries, indicating duplicate DeferredSeries values.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Generate a Series with duplicated entries.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s1">&#39;lama&#39;</span><span class="p">,</span> <span class="s1">&#39;cow&#39;</span><span class="p">,</span> <span class="s1">&#39;lama&#39;</span><span class="p">,</span> <span class="s1">&#39;beetle&#39;</span><span class="p">,</span> <span class="s1">&#39;lama&#39;</span><span class="p">,</span> <span class="s1">&#39;hippo&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">name</span><span class="o">=</span><span class="s1">&#39;animal&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 lama</span>
<span class="go">1 cow</span>
<span class="go">2 lama</span>
<span class="go">3 beetle</span>
<span class="go">4 lama</span>
<span class="go">5 hippo</span>
<span class="go">Name: animal, dtype: object</span>
<span class="go">With the &#39;keep&#39; parameter, the selection behaviour of duplicated values</span>
<span class="go">can be changed. The value &#39;first&#39; keeps the first occurrence for each</span>
<span class="go">set of duplicated entries. The default value of keep is &#39;first&#39;.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">drop_duplicates</span><span class="p">()</span>
<span class="go">0 lama</span>
<span class="go">1 cow</span>
<span class="go">3 beetle</span>
<span class="go">5 hippo</span>
<span class="go">Name: animal, dtype: object</span>
<span class="go">The value &#39;last&#39; for parameter &#39;keep&#39; keeps the last occurrence for</span>
<span class="go">each set of duplicated entries.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">drop_duplicates</span><span class="p">(</span><span class="n">keep</span><span class="o">=</span><span class="s1">&#39;last&#39;</span><span class="p">)</span>
<span class="go">1 cow</span>
<span class="go">3 beetle</span>
<span class="go">4 lama</span>
<span class="go">5 hippo</span>
<span class="go">Name: animal, dtype: object</span>
<span class="go">The value ``False`` for parameter &#39;keep&#39; discards all sets of</span>
<span class="go">duplicated entries. Setting the value of &#39;inplace&#39; to ``True`` performs</span>
<span class="go">the operation inplace and returns ``None``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">drop_duplicates</span><span class="p">(</span><span class="n">keep</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">1 cow</span>
<span class="go">3 beetle</span>
<span class="go">5 hippo</span>
<span class="go">Name: animal, dtype: object</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.sample">
<code class="descname">sample</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.sample"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.sample" title="Permalink to this definition"></a></dt>
<dd><p>Return a random sample of items from an axis of object.</p>
<p>You can use <cite>random_state</cite> for reproducibility.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>n</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – Number of items from axis to return. Cannot be used with <cite>frac</cite>.
Default = 1 if <cite>frac</cite> = None.</li>
<li><strong>frac</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em>, </em><em>optional</em>) – Fraction of axis items to return. Cannot be used with <cite>n</cite>.</li>
<li><strong>replace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Allow or disallow sampling of the same row more than once.</li>
<li><strong>weights</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>ndarray-like</em><em>, </em><em>optional</em>) – Default ‘None’ results in equal probability weighting.
If passed a DeferredSeries, will align with target object on index. Index
values in weights not found in sampled object will be ignored and
index values in sampled object not in weights will be assigned
weights of zero.
If called on a DeferredDataFrame, will accept the name of a column
when axis = 0.
Unless weights are a DeferredSeries, weights must be same length as axis
being sampled.
If weights do not sum to 1, they will be normalized to sum to 1.
Missing values in the weights column will be treated as zero.
Infinite values not allowed.</li>
<li><strong>random_state</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>array-like</em><em>, </em><em>BitGenerator</em><em>, </em><em>np.random.RandomState</em><em>, </em><em>optional</em>) – <p>If int, array-like, or BitGenerator (NumPy&gt;=1.17), seed for
random number generator
If np.random.RandomState, use as numpy RandomState object.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.1.0: </span>array-like and BitGenerator (for NumPy&gt;=1.17) object now passed to
np.random.RandomState() as seed</p>
</div>
</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>‘index’</em><em>, </em><em>1</em><em> or </em><em>‘columns’</em><em>, </em><em>None}</em><em>, </em><em>default None</em>) – Axis to sample. Accepts axis number or name. Default is stat axis
for given data type (0 for DeferredSeries and DeferredDataFrames).</li>
<li><strong>ignore_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>If True, the resulting index will be labeled 0, 1, …, n - 1.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.3.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A new object of same type as caller containing <cite>n</cite> items randomly
sampled from the caller object.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only <code class="docutils literal notranslate"><span class="pre">n</span></code> and/or <code class="docutils literal notranslate"><span class="pre">weights</span></code> may be specified. <code class="docutils literal notranslate"><span class="pre">frac</span></code>,
<code class="docutils literal notranslate"><span class="pre">random_state</span></code>, and <code class="docutils literal notranslate"><span class="pre">replace=True</span></code> are not yet supported.
See <a class="reference external" href="https://issues.apache.org/jira/BEAM-12476">BEAM-12476</a>.</p>
<p>Note that pandas will raise an error if <code class="docutils literal notranslate"><span class="pre">n</span></code> is larger than the length
of the dataset, while the Beam DataFrame API will simply return the full
dataset in that case.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrameGroupBy.sample()</span></code></dt>
<dd>Generates random samples from each group of a DeferredDataFrame object.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeriesGroupBy.sample()</span></code></dt>
<dd>Generates random samples from each group of a DeferredSeries object.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.random.choice()</span></code></dt>
<dd>Generates a random sample from a given 1-D numpy array.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>If <cite>frac</cite> &gt; 1, <cite>replacement</cite> should be set to <cite>True</cite>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;num_legs&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;num_wings&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;num_specimen_seen&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">10</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">8</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;dog&#39;</span><span class="p">,</span> <span class="s1">&#39;spider&#39;</span><span class="p">,</span> <span class="s1">&#39;fish&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> num_legs num_wings num_specimen_seen</span>
<span class="go">falcon 2 2 10</span>
<span class="go">dog 4 0 2</span>
<span class="go">spider 8 0 1</span>
<span class="go">fish 0 0 8</span>
<span class="go">Extract 3 random elements from the ``Series`` ``df[&#39;num_legs&#39;]``:</span>
<span class="go">Note that we use `random_state` to ensure the reproducibility of</span>
<span class="go">the examples.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="p">[</span><span class="s1">&#39;num_legs&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">n</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">fish 0</span>
<span class="go">spider 8</span>
<span class="go">falcon 2</span>
<span class="go">Name: num_legs, dtype: int64</span>
<span class="go">A random 50% sample of the ``DataFrame`` with replacement:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">frac</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span> <span class="n">replace</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> num_legs num_wings num_specimen_seen</span>
<span class="go">dog 4 0 2</span>
<span class="go">fish 0 0 8</span>
<span class="go">An upsample sample of the ``DataFrame`` with replacement:</span>
<span class="go">Note that `replace` parameter has to be `True` for `frac` parameter &gt; 1.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">frac</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">replace</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> num_legs num_wings num_specimen_seen</span>
<span class="go">dog 4 0 2</span>
<span class="go">fish 0 0 8</span>
<span class="go">falcon 2 2 10</span>
<span class="go">falcon 2 2 10</span>
<span class="go">fish 0 0 8</span>
<span class="go">dog 4 0 2</span>
<span class="go">fish 0 0 8</span>
<span class="go">dog 4 0 2</span>
<span class="go">Using a DataFrame column as weights. Rows with larger value in the</span>
<span class="go">`num_specimen_seen` column are more likely to be sampled.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">n</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">weights</span><span class="o">=</span><span class="s1">&#39;num_specimen_seen&#39;</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> num_legs num_wings num_specimen_seen</span>
<span class="go">falcon 2 2 10</span>
<span class="go">fish 0 0 8</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.aggregate">
<code class="descname">aggregate</code><span class="sig-paren">(</span><em>func</em>, <em>axis</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.aggregate"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.aggregate" title="Permalink to this definition"></a></dt>
<dd><p>Aggregate using one or more operations over the specified axis.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>func</strong> (<em>function</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a>) – <p>Function to use for aggregating the data. If a function, must either
work when passed a DeferredSeries or when passed to DeferredSeries.apply.</p>
<p>Accepted combinations are:</p>
<ul>
<li>function</li>
<li>string function name</li>
<li>list of functions and/or function names, e.g. <code class="docutils literal notranslate"><span class="pre">[np.sum,</span> <span class="pre">'mean']</span></code></li>
<li>dict of axis labels -&gt; functions, function names or list of such.</li>
</ul>
</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'}</em>) – Parameter needed for compatibility with DeferredDataFrame.</li>
<li><strong>*args</strong> – Positional arguments to pass to <cite>func</cite>.</li>
<li><strong>**kwargs</strong> – Keyword arguments to pass to <cite>func</cite>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><p>The return can be:</p>
<ul class="simple">
<li>scalar : when DeferredSeries.agg is called with single function</li>
<li>DeferredSeries : when DeferredDataFrame.agg is called with a single function</li>
<li>DeferredDataFrame : when DeferredDataFrame.agg is called with several functions</li>
</ul>
<p>Return scalar, DeferredSeries or DeferredDataFrame.</p>
</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar, <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Some aggregation methods cannot be parallelized, and computing
them will require collecting all data on a single machine.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.apply" title="apache_beam.dataframe.frames.DeferredSeries.apply"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.apply()</span></code></a></dt>
<dd>Invoke function on a DeferredSeries.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.transform" title="apache_beam.dataframe.frames.DeferredSeries.transform"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.transform()</span></code></a></dt>
<dd>Transform function producing a DeferredSeries with like indexes.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p><cite>agg</cite> is an alias for <cite>aggregate</cite>. Use the alias.</p>
<p>Functions that mutate the passed object can produce unexpected
behavior or errors and are not supported. See <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/gotchas.html#gotchas-udf-mutation" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span>Mutating with User Defined Function (UDF) methods</span></a>
for more details.</p>
<p>A passed user-defined-function will be passed a DeferredSeries for evaluation.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">2 3</span>
<span class="go">3 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="s1">&#39;min&#39;</span><span class="p">)</span>
<span class="go">1</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">agg</span><span class="p">([</span><span class="s1">&#39;min&#39;</span><span class="p">,</span> <span class="s1">&#39;max&#39;</span><span class="p">])</span>
<span class="go">min 1</span>
<span class="go">max 4</span>
<span class="go">dtype: int64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.agg">
<code class="descname">agg</code><span class="sig-paren">(</span><em>func</em>, <em>axis</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.agg" title="Permalink to this definition"></a></dt>
<dd><p>Aggregate using one or more operations over the specified axis.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>func</strong> (<em>function</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a>) – <p>Function to use for aggregating the data. If a function, must either
work when passed a DeferredSeries or when passed to DeferredSeries.apply.</p>
<p>Accepted combinations are:</p>
<ul>
<li>function</li>
<li>string function name</li>
<li>list of functions and/or function names, e.g. <code class="docutils literal notranslate"><span class="pre">[np.sum,</span> <span class="pre">'mean']</span></code></li>
<li>dict of axis labels -&gt; functions, function names or list of such.</li>
</ul>
</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'}</em>) – Parameter needed for compatibility with DeferredDataFrame.</li>
<li><strong>*args</strong> – Positional arguments to pass to <cite>func</cite>.</li>
<li><strong>**kwargs</strong> – Keyword arguments to pass to <cite>func</cite>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><p>The return can be:</p>
<ul class="simple">
<li>scalar : when DeferredSeries.agg is called with single function</li>
<li>DeferredSeries : when DeferredDataFrame.agg is called with a single function</li>
<li>DeferredDataFrame : when DeferredDataFrame.agg is called with several functions</li>
</ul>
<p>Return scalar, DeferredSeries or DeferredDataFrame.</p>
</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar, <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Some aggregation methods cannot be parallelized, and computing
them will require collecting all data on a single machine.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.apply" title="apache_beam.dataframe.frames.DeferredSeries.apply"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.apply()</span></code></a></dt>
<dd>Invoke function on a DeferredSeries.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.transform" title="apache_beam.dataframe.frames.DeferredSeries.transform"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.transform()</span></code></a></dt>
<dd>Transform function producing a DeferredSeries with like indexes.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p><cite>agg</cite> is an alias for <cite>aggregate</cite>. Use the alias.</p>
<p>Functions that mutate the passed object can produce unexpected
behavior or errors and are not supported. See <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/gotchas.html#gotchas-udf-mutation" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span>Mutating with User Defined Function (UDF) methods</span></a>
for more details.</p>
<p>A passed user-defined-function will be passed a DeferredSeries for evaluation.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">2 3</span>
<span class="go">3 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="s1">&#39;min&#39;</span><span class="p">)</span>
<span class="go">1</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">agg</span><span class="p">([</span><span class="s1">&#39;min&#39;</span><span class="p">,</span> <span class="s1">&#39;max&#39;</span><span class="p">])</span>
<span class="go">min 1</span>
<span class="go">max 4</span>
<span class="go">dtype: int64</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.axes">
<code class="descname">axes</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.axes" title="Permalink to this definition"></a></dt>
<dd><p>Return a list of the row axis labels.</p>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.clip">
<code class="descname">clip</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.clip" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.all">
<code class="descname">all</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.all" title="Permalink to this definition"></a></dt>
<dd><p>Return whether all elements are True, potentially over an axis.</p>
<p>Returns True unless there at least one element within a series or
along a Dataframe axis that is False or equivalent (e.g. zero or
empty).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'</em><em>, </em><em>None}</em><em>, </em><em>default 0</em>) – <p>Indicate which axis or axes should be reduced.</p>
<ul>
<li>0 / ‘index’ : reduce the index, return a DeferredSeries whose index is the
original column labels.</li>
<li>1 / ‘columns’ : reduce the columns, return a DeferredSeries whose index is the
original index.</li>
<li>None : reduce all axes, return a scalar.</li>
</ul>
</li>
<li><strong>bool_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only boolean columns. If None, will attempt to use everything,
then use only boolean data. Not implemented for DeferredSeries.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values. If the entire row/column is NA and skipna is
True, then the result will be True, as for an empty row/column.
If skipna is False, then NA are treated as True, because these are not
equal to zero.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
<li><strong>**kwargs</strong> (<em>any</em><em>, </em><em>default None</em>) – Additional keywords have no effect but might be accepted for
compatibility with NumPy.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">If level is specified, then, DeferredSeries is returned; otherwise, scalar
is returned.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.all" title="apache_beam.dataframe.frames.DeferredSeries.all"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.all()</span></code></a></dt>
<dd>Return True if all elements are True.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.any" title="apache_beam.dataframe.frames.DeferredDataFrame.any"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.any()</span></code></a></dt>
<dd>Return True if one (or more) elements are True.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">**Series**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="kc">True</span><span class="p">,</span> <span class="kc">True</span><span class="p">])</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
<span class="go">True</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">])</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
<span class="go">True</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
<span class="go">True</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">skipna</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">True</span>
<span class="go">**DataFrames**</span>
<span class="go">Create a dataframe from a dictionary.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">True</span><span class="p">],</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> col1 col2</span>
<span class="go">0 True True</span>
<span class="go">1 True False</span>
<span class="go">Default behaviour checks if column-wise values all return True.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
<span class="go">col1 True</span>
<span class="go">col2 False</span>
<span class="go">dtype: bool</span>
<span class="go">Specify ``axis=&#39;columns&#39;`` to check if row-wise values all return True.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go">0 True</span>
<span class="go">1 False</span>
<span class="go">dtype: bool</span>
<span class="go">Or ``axis=None`` for whether every value is True.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
<span class="go">False</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.any">
<code class="descname">any</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.any" title="Permalink to this definition"></a></dt>
<dd><p>Return whether any element is True, potentially over an axis.</p>
<p>Returns False unless there is at least one element within a series or
along a Dataframe axis that is True or equivalent (e.g. non-zero or
non-empty).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'</em><em>, </em><em>None}</em><em>, </em><em>default 0</em>) – <p>Indicate which axis or axes should be reduced.</p>
<ul>
<li>0 / ‘index’ : reduce the index, return a DeferredSeries whose index is the
original column labels.</li>
<li>1 / ‘columns’ : reduce the columns, return a DeferredSeries whose index is the
original index.</li>
<li>None : reduce all axes, return a scalar.</li>
</ul>
</li>
<li><strong>bool_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only boolean columns. If None, will attempt to use everything,
then use only boolean data. Not implemented for DeferredSeries.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values. If the entire row/column is NA and skipna is
True, then the result will be False, as for an empty row/column.
If skipna is False, then NA are treated as True, because these are not
equal to zero.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
<li><strong>**kwargs</strong> (<em>any</em><em>, </em><em>default None</em>) – Additional keywords have no effect but might be accepted for
compatibility with NumPy.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">If level is specified, then, DeferredSeries is returned; otherwise, scalar
is returned.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.any()</span></code></dt>
<dd>Numpy version of this method.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.any" title="apache_beam.dataframe.frames.DeferredSeries.any"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.any()</span></code></a></dt>
<dd>Return whether any element is True.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.all" title="apache_beam.dataframe.frames.DeferredSeries.all"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.all()</span></code></a></dt>
<dd>Return whether all elements are True.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.any" title="apache_beam.dataframe.frames.DeferredDataFrame.any"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.any()</span></code></a></dt>
<dd>Return whether any element is True over requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.all" title="apache_beam.dataframe.frames.DeferredDataFrame.all"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.all()</span></code></a></dt>
<dd>Return whether all elements are True over requested axis.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">**Series**</span>
<span class="go">For Series input, the output is a scalar indicating whether any element</span>
<span class="go">is True.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="kc">False</span><span class="p">,</span> <span class="kc">False</span><span class="p">])</span><span class="o">.</span><span class="n">any</span><span class="p">()</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">])</span><span class="o">.</span><span class="n">any</span><span class="p">()</span>
<span class="go">True</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">any</span><span class="p">()</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">any</span><span class="p">()</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="n">skipna</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">True</span>
<span class="go">**DataFrame**</span>
<span class="go">Whether each column contains at least one True element (the default).</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;A&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s2">&quot;B&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s2">&quot;C&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B C</span>
<span class="go">0 1 0 0</span>
<span class="go">1 2 2 0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">any</span><span class="p">()</span>
<span class="go">A True</span>
<span class="go">B True</span>
<span class="go">C False</span>
<span class="go">dtype: bool</span>
<span class="go">Aggregating over the columns.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;A&quot;</span><span class="p">:</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">],</span> <span class="s2">&quot;B&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 True 1</span>
<span class="go">1 False 2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go">0 True</span>
<span class="go">1 True</span>
<span class="go">dtype: bool</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;A&quot;</span><span class="p">:</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">],</span> <span class="s2">&quot;B&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 True 1</span>
<span class="go">1 False 0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go">0 True</span>
<span class="go">1 False</span>
<span class="go">dtype: bool</span>
<span class="go">Aggregating over the entire DataFrame with ``axis=None``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
<span class="go">True</span>
<span class="go">`any` for an empty DataFrame is an empty Series.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([])</span><span class="o">.</span><span class="n">any</span><span class="p">()</span>
<span class="go">Series([], dtype: bool)</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.count">
<code class="descname">count</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.count" title="Permalink to this definition"></a></dt>
<dd><p>Return number of non-NA/null observations in the Series.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a smaller DeferredSeries.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">Number of non-null values in the DeferredSeries.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)">int</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> (if level specified)</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.count" title="apache_beam.dataframe.frames.DeferredDataFrame.count"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.count()</span></code></a></dt>
<dd>Count non-NA cells for each column or row.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mf">0.0</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">count</span><span class="p">()</span>
<span class="go">2</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.describe">
<code class="descname">describe</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.describe" title="Permalink to this definition"></a></dt>
<dd><p>Generate descriptive statistics.</p>
<p>Descriptive statistics include those that summarize the central
tendency, dispersion and shape of a
dataset’s distribution, excluding <code class="docutils literal notranslate"><span class="pre">NaN</span></code> values.</p>
<p>Analyzes both numeric and object series, as well
as <code class="docutils literal notranslate"><span class="pre">DataFrame</span></code> column sets of mixed data types. The output
will vary depending on what is provided. Refer to the notes
below for more detail.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>percentiles</strong> (<em>list-like of numbers</em><em>, </em><em>optional</em>) – The percentiles to include in the output. All should
fall between 0 and 1. The default is
<code class="docutils literal notranslate"><span class="pre">[.25,</span> <span class="pre">.5,</span> <span class="pre">.75]</span></code>, which returns the 25th, 50th, and
75th percentiles.</li>
<li><strong>include</strong> (<em>'all'</em><em>, </em><em>list-like of dtypes</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> (</em><em>default</em><em>)</em><em>, </em><em>optional</em>) – <p>A white list of data types to include in the result. Ignored
for <code class="docutils literal notranslate"><span class="pre">DeferredSeries</span></code>. Here are the options:</p>
<ul>
<li>’all’ : All columns of the input will be included in the output.</li>
<li>A list-like of dtypes : Limits the results to the
provided data types.
To limit the result to numeric types submit
<code class="docutils literal notranslate"><span class="pre">numpy.number</span></code>. To limit it instead to object columns submit
the <code class="docutils literal notranslate"><span class="pre">numpy.object</span></code> data type. Strings
can also be used in the style of
<code class="docutils literal notranslate"><span class="pre">select_dtypes</span></code> (e.g. <code class="docutils literal notranslate"><span class="pre">df.describe(include=['O'])</span></code>). To
select pandas categorical columns, use <code class="docutils literal notranslate"><span class="pre">'category'</span></code></li>
<li>None (default) : The result will include all numeric columns.</li>
</ul>
</li>
<li><strong>exclude</strong> (<em>list-like of dtypes</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> (</em><em>default</em><em>)</em><em>, </em><em>optional</em><em>,</em>) – <p>A black list of data types to omit from the result. Ignored
for <code class="docutils literal notranslate"><span class="pre">DeferredSeries</span></code>. Here are the options:</p>
<ul>
<li>A list-like of dtypes : Excludes the provided data types
from the result. To exclude numeric types submit
<code class="docutils literal notranslate"><span class="pre">numpy.number</span></code>. To exclude object columns submit the data
type <code class="docutils literal notranslate"><span class="pre">numpy.object</span></code>. Strings can also be used in the style of
<code class="docutils literal notranslate"><span class="pre">select_dtypes</span></code> (e.g. <code class="docutils literal notranslate"><span class="pre">df.describe(include=['O'])</span></code>). To
exclude pandas categorical columns, use <code class="docutils literal notranslate"><span class="pre">'category'</span></code></li>
<li>None (default) : The result will exclude nothing.</li>
</ul>
</li>
<li><strong>datetime_is_numeric</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>Whether to treat datetime dtypes as numeric. This affects statistics
calculated for the column. For DeferredDataFrame input, this also
controls whether datetime columns are included by default.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Summary statistics of the DeferredSeries or Dataframe provided.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">describe</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.count" title="apache_beam.dataframe.frames.DeferredDataFrame.count"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.count()</span></code></a></dt>
<dd>Count number of non-NA/null observations.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.max" title="apache_beam.dataframe.frames.DeferredDataFrame.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.max()</span></code></a></dt>
<dd>Maximum of the values in the object.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.min" title="apache_beam.dataframe.frames.DeferredDataFrame.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.min()</span></code></a></dt>
<dd>Minimum of the values in the object.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mean" title="apache_beam.dataframe.frames.DeferredDataFrame.mean"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mean()</span></code></a></dt>
<dd>Mean of the values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.std" title="apache_beam.dataframe.frames.DeferredDataFrame.std"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.std()</span></code></a></dt>
<dd>Standard deviation of the observations.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.select_dtypes" title="apache_beam.dataframe.frames.DeferredDataFrame.select_dtypes"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.select_dtypes()</span></code></a></dt>
<dd>Subset of a DeferredDataFrame including/excluding columns based on their dtype.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>For numeric data, the result’s index will include <code class="docutils literal notranslate"><span class="pre">count</span></code>,
<code class="docutils literal notranslate"><span class="pre">mean</span></code>, <code class="docutils literal notranslate"><span class="pre">std</span></code>, <code class="docutils literal notranslate"><span class="pre">min</span></code>, <code class="docutils literal notranslate"><span class="pre">max</span></code> as well as lower, <code class="docutils literal notranslate"><span class="pre">50</span></code> and
upper percentiles. By default the lower percentile is <code class="docutils literal notranslate"><span class="pre">25</span></code> and the
upper percentile is <code class="docutils literal notranslate"><span class="pre">75</span></code>. The <code class="docutils literal notranslate"><span class="pre">50</span></code> percentile is the
same as the median.</p>
<p>For object data (e.g. strings or timestamps), the result’s index
will include <code class="docutils literal notranslate"><span class="pre">count</span></code>, <code class="docutils literal notranslate"><span class="pre">unique</span></code>, <code class="docutils literal notranslate"><span class="pre">top</span></code>, and <code class="docutils literal notranslate"><span class="pre">freq</span></code>. The <code class="docutils literal notranslate"><span class="pre">top</span></code>
is the most common value. The <code class="docutils literal notranslate"><span class="pre">freq</span></code> is the most common value’s
frequency. Timestamps also include the <code class="docutils literal notranslate"><span class="pre">first</span></code> and <code class="docutils literal notranslate"><span class="pre">last</span></code> items.</p>
<p>If multiple object values have the highest count, then the
<code class="docutils literal notranslate"><span class="pre">count</span></code> and <code class="docutils literal notranslate"><span class="pre">top</span></code> results will be arbitrarily chosen from
among those with the highest count.</p>
<p>For mixed data types provided via a <code class="docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>, the default is to
return only an analysis of numeric columns. If the dataframe consists
only of object and categorical data without any numeric columns, the
default is to return an analysis of both the object and categorical
columns. If <code class="docutils literal notranslate"><span class="pre">include='all'</span></code> is provided as an option, the result
will include a union of attributes of each type.</p>
<p>The <cite>include</cite> and <cite>exclude</cite> parameters can be used to limit
which columns in a <code class="docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code> are analyzed for the output.
The parameters are ignored when analyzing a <code class="docutils literal notranslate"><span class="pre">DeferredSeries</span></code>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Describing a numeric ``Series``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">describe</span><span class="p">()</span>
<span class="go">count 3.0</span>
<span class="go">mean 2.0</span>
<span class="go">std 1.0</span>
<span class="go">min 1.0</span>
<span class="go">25% 1.5</span>
<span class="go">50% 2.0</span>
<span class="go">75% 2.5</span>
<span class="go">max 3.0</span>
<span class="go">dtype: float64</span>
<span class="go">Describing a categorical ``Series``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">describe</span><span class="p">()</span>
<span class="go">count 4</span>
<span class="go">unique 3</span>
<span class="go">top a</span>
<span class="go">freq 2</span>
<span class="go">dtype: object</span>
<span class="go">Describing a timestamp ``Series``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span>
<span class="gp">... </span> <span class="n">np</span><span class="o">.</span><span class="n">datetime64</span><span class="p">(</span><span class="s2">&quot;2000-01-01&quot;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">np</span><span class="o">.</span><span class="n">datetime64</span><span class="p">(</span><span class="s2">&quot;2010-01-01&quot;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">np</span><span class="o">.</span><span class="n">datetime64</span><span class="p">(</span><span class="s2">&quot;2010-01-01&quot;</span><span class="p">)</span>
<span class="gp">... </span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">datetime_is_numeric</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go">count 3</span>
<span class="go">mean 2006-09-01 08:00:00</span>
<span class="go">min 2000-01-01 00:00:00</span>
<span class="go">25% 2004-12-31 12:00:00</span>
<span class="go">50% 2010-01-01 00:00:00</span>
<span class="go">75% 2010-01-01 00:00:00</span>
<span class="go">max 2010-01-01 00:00:00</span>
<span class="go">dtype: object</span>
<span class="go">Describing a ``DataFrame``. By default only numeric fields</span>
<span class="go">are returned.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;categorical&#39;</span><span class="p">:</span> <span class="n">pd</span><span class="o">.</span><span class="n">Categorical</span><span class="p">([</span><span class="s1">&#39;d&#39;</span><span class="p">,</span><span class="s1">&#39;e&#39;</span><span class="p">,</span><span class="s1">&#39;f&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="s1">&#39;numeric&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;object&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">]</span>
<span class="gp">... </span> <span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">describe</span><span class="p">()</span>
<span class="go"> numeric</span>
<span class="go">count 3.0</span>
<span class="go">mean 2.0</span>
<span class="go">std 1.0</span>
<span class="go">min 1.0</span>
<span class="go">25% 1.5</span>
<span class="go">50% 2.0</span>
<span class="go">75% 2.5</span>
<span class="go">max 3.0</span>
<span class="go">Describing all columns of a ``DataFrame`` regardless of data type.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="s1">&#39;all&#39;</span><span class="p">)</span>
<span class="go"> categorical numeric object</span>
<span class="go">count 3 3.0 3</span>
<span class="go">unique 3 NaN 3</span>
<span class="go">top f NaN a</span>
<span class="go">freq 1 NaN 1</span>
<span class="go">mean NaN 2.0 NaN</span>
<span class="go">std NaN 1.0 NaN</span>
<span class="go">min NaN 1.0 NaN</span>
<span class="go">25% NaN 1.5 NaN</span>
<span class="go">50% NaN 2.0 NaN</span>
<span class="go">75% NaN 2.5 NaN</span>
<span class="go">max NaN 3.0 NaN</span>
<span class="go">Describing a column from a ``DataFrame`` by accessing it as</span>
<span class="go">an attribute.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">numeric</span><span class="o">.</span><span class="n">describe</span><span class="p">()</span>
<span class="go">count 3.0</span>
<span class="go">mean 2.0</span>
<span class="go">std 1.0</span>
<span class="go">min 1.0</span>
<span class="go">25% 1.5</span>
<span class="go">50% 2.0</span>
<span class="go">75% 2.5</span>
<span class="go">max 3.0</span>
<span class="go">Name: numeric, dtype: float64</span>
<span class="go">Including only numeric columns in a ``DataFrame`` description.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">number</span><span class="p">])</span>
<span class="go"> numeric</span>
<span class="go">count 3.0</span>
<span class="go">mean 2.0</span>
<span class="go">std 1.0</span>
<span class="go">min 1.0</span>
<span class="go">25% 1.5</span>
<span class="go">50% 2.0</span>
<span class="go">75% 2.5</span>
<span class="go">max 3.0</span>
<span class="go">Including only string columns in a ``DataFrame`` description.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="p">[</span><span class="nb">object</span><span class="p">])</span>
<span class="go"> object</span>
<span class="go">count 3</span>
<span class="go">unique 3</span>
<span class="go">top a</span>
<span class="go">freq 1</span>
<span class="go">Including only categorical columns from a ``DataFrame`` description.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;category&#39;</span><span class="p">])</span>
<span class="go"> categorical</span>
<span class="go">count 3</span>
<span class="go">unique 3</span>
<span class="go">top d</span>
<span class="go">freq 1</span>
<span class="go">Excluding numeric columns from a ``DataFrame`` description.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">exclude</span><span class="o">=</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">number</span><span class="p">])</span>
<span class="go"> categorical object</span>
<span class="go">count 3 3</span>
<span class="go">unique 3 3</span>
<span class="go">top f a</span>
<span class="go">freq 1 1</span>
<span class="go">Excluding object columns from a ``DataFrame`` description.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">exclude</span><span class="o">=</span><span class="p">[</span><span class="nb">object</span><span class="p">])</span>
<span class="go"> categorical numeric</span>
<span class="go">count 3 3.0</span>
<span class="go">unique 3 NaN</span>
<span class="go">top f NaN</span>
<span class="go">freq 1 NaN</span>
<span class="go">mean NaN 2.0</span>
<span class="go">std NaN 1.0</span>
<span class="go">min NaN 1.0</span>
<span class="go">25% NaN 1.5</span>
<span class="go">50% NaN 2.0</span>
<span class="go">75% NaN 2.5</span>
<span class="go">max NaN 3.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.min">
<code class="descname">min</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.min" title="Permalink to this definition"></a></dt>
<dd><p>Return the minimum of the values over the requested axis.</p>
<p>If you want the <em>index</em> of the minimum, use <code class="docutils literal notranslate"><span class="pre">idxmin</span></code>. This is the equivalent of the <code class="docutils literal notranslate"><span class="pre">numpy.ndarray</span></code> method <code class="docutils literal notranslate"><span class="pre">argmin</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sum" title="apache_beam.dataframe.frames.DeferredSeries.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sum()</span></code></a></dt>
<dd>Return the sum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.min" title="apache_beam.dataframe.frames.DeferredSeries.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.min()</span></code></a></dt>
<dd>Return the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.max" title="apache_beam.dataframe.frames.DeferredSeries.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.max()</span></code></a></dt>
<dd>Return the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmin" title="apache_beam.dataframe.frames.DeferredSeries.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmax" title="apache_beam.dataframe.frames.DeferredSeries.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sum" title="apache_beam.dataframe.frames.DeferredDataFrame.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sum()</span></code></a></dt>
<dd>Return the sum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.min" title="apache_beam.dataframe.frames.DeferredDataFrame.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.min()</span></code></a></dt>
<dd>Return the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.max" title="apache_beam.dataframe.frames.DeferredDataFrame.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.max()</span></code></a></dt>
<dd>Return the maximum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmin" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmax" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum over the requested axis.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">idx</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_arrays</span><span class="p">([</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;warm&#39;</span><span class="p">,</span> <span class="s1">&#39;warm&#39;</span><span class="p">,</span> <span class="s1">&#39;cold&#39;</span><span class="p">,</span> <span class="s1">&#39;cold&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;dog&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;fish&#39;</span><span class="p">,</span> <span class="s1">&#39;spider&#39;</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">names</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;blooded&#39;</span><span class="p">,</span> <span class="s1">&#39;animal&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">8</span><span class="p">],</span> <span class="n">name</span><span class="o">=</span><span class="s1">&#39;legs&#39;</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="n">idx</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">blooded animal</span>
<span class="go">warm dog 4</span>
<span class="go"> falcon 2</span>
<span class="go">cold fish 0</span>
<span class="go"> spider 8</span>
<span class="go">Name: legs, dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">min</span><span class="p">()</span>
<span class="go">0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.max">
<code class="descname">max</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.max" title="Permalink to this definition"></a></dt>
<dd><p>Return the maximum of the values over the requested axis.</p>
<p>If you want the <em>index</em> of the maximum, use <code class="docutils literal notranslate"><span class="pre">idxmax</span></code>. This is the equivalent of the <code class="docutils literal notranslate"><span class="pre">numpy.ndarray</span></code> method <code class="docutils literal notranslate"><span class="pre">argmax</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sum" title="apache_beam.dataframe.frames.DeferredSeries.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sum()</span></code></a></dt>
<dd>Return the sum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.min" title="apache_beam.dataframe.frames.DeferredSeries.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.min()</span></code></a></dt>
<dd>Return the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.max" title="apache_beam.dataframe.frames.DeferredSeries.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.max()</span></code></a></dt>
<dd>Return the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmin" title="apache_beam.dataframe.frames.DeferredSeries.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmax" title="apache_beam.dataframe.frames.DeferredSeries.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sum" title="apache_beam.dataframe.frames.DeferredDataFrame.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sum()</span></code></a></dt>
<dd>Return the sum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.min" title="apache_beam.dataframe.frames.DeferredDataFrame.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.min()</span></code></a></dt>
<dd>Return the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.max" title="apache_beam.dataframe.frames.DeferredDataFrame.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.max()</span></code></a></dt>
<dd>Return the maximum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmin" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmax" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum over the requested axis.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">idx</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_arrays</span><span class="p">([</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;warm&#39;</span><span class="p">,</span> <span class="s1">&#39;warm&#39;</span><span class="p">,</span> <span class="s1">&#39;cold&#39;</span><span class="p">,</span> <span class="s1">&#39;cold&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;dog&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;fish&#39;</span><span class="p">,</span> <span class="s1">&#39;spider&#39;</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">names</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;blooded&#39;</span><span class="p">,</span> <span class="s1">&#39;animal&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">8</span><span class="p">],</span> <span class="n">name</span><span class="o">=</span><span class="s1">&#39;legs&#39;</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="n">idx</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">blooded animal</span>
<span class="go">warm dog 4</span>
<span class="go"> falcon 2</span>
<span class="go">cold fish 0</span>
<span class="go"> spider 8</span>
<span class="go">Name: legs, dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
<span class="go">8</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.prod">
<code class="descname">prod</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.prod" title="Permalink to this definition"></a></dt>
<dd><p>Return the product of the values over the requested axis.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>min_count</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 0</em>) – The required number of valid values to perform the operation. If fewer than
<code class="docutils literal notranslate"><span class="pre">min_count</span></code> non-NA values are present the result will be NA.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sum" title="apache_beam.dataframe.frames.DeferredSeries.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sum()</span></code></a></dt>
<dd>Return the sum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.min" title="apache_beam.dataframe.frames.DeferredSeries.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.min()</span></code></a></dt>
<dd>Return the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.max" title="apache_beam.dataframe.frames.DeferredSeries.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.max()</span></code></a></dt>
<dd>Return the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmin" title="apache_beam.dataframe.frames.DeferredSeries.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmax" title="apache_beam.dataframe.frames.DeferredSeries.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sum" title="apache_beam.dataframe.frames.DeferredDataFrame.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sum()</span></code></a></dt>
<dd>Return the sum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.min" title="apache_beam.dataframe.frames.DeferredDataFrame.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.min()</span></code></a></dt>
<dd>Return the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.max" title="apache_beam.dataframe.frames.DeferredDataFrame.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.max()</span></code></a></dt>
<dd>Return the maximum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmin" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmax" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum over the requested axis.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">By default, the product of an empty or all-NA Series is ``1``</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">prod</span><span class="p">()</span>
<span class="go">1.0</span>
<span class="go">This can be controlled with the ``min_count`` parameter</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">prod</span><span class="p">(</span><span class="n">min_count</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">nan</span>
<span class="go">Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and</span>
<span class="go">empty series identically.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">prod</span><span class="p">()</span>
<span class="go">1.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">prod</span><span class="p">(</span><span class="n">min_count</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">nan</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.product">
<code class="descname">product</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.product" title="Permalink to this definition"></a></dt>
<dd><p>Return the product of the values over the requested axis.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>min_count</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 0</em>) – The required number of valid values to perform the operation. If fewer than
<code class="docutils literal notranslate"><span class="pre">min_count</span></code> non-NA values are present the result will be NA.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sum" title="apache_beam.dataframe.frames.DeferredSeries.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sum()</span></code></a></dt>
<dd>Return the sum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.min" title="apache_beam.dataframe.frames.DeferredSeries.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.min()</span></code></a></dt>
<dd>Return the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.max" title="apache_beam.dataframe.frames.DeferredSeries.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.max()</span></code></a></dt>
<dd>Return the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmin" title="apache_beam.dataframe.frames.DeferredSeries.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmax" title="apache_beam.dataframe.frames.DeferredSeries.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sum" title="apache_beam.dataframe.frames.DeferredDataFrame.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sum()</span></code></a></dt>
<dd>Return the sum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.min" title="apache_beam.dataframe.frames.DeferredDataFrame.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.min()</span></code></a></dt>
<dd>Return the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.max" title="apache_beam.dataframe.frames.DeferredDataFrame.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.max()</span></code></a></dt>
<dd>Return the maximum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmin" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmax" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum over the requested axis.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">By default, the product of an empty or all-NA Series is ``1``</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">prod</span><span class="p">()</span>
<span class="go">1.0</span>
<span class="go">This can be controlled with the ``min_count`` parameter</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">prod</span><span class="p">(</span><span class="n">min_count</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">nan</span>
<span class="go">Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and</span>
<span class="go">empty series identically.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">prod</span><span class="p">()</span>
<span class="go">1.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">prod</span><span class="p">(</span><span class="n">min_count</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">nan</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.sum">
<code class="descname">sum</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.sum" title="Permalink to this definition"></a></dt>
<dd><p>Return the sum of the values over the requested axis.</p>
<p>This is equivalent to the method <code class="docutils literal notranslate"><span class="pre">numpy.sum</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>min_count</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 0</em>) – The required number of valid values to perform the operation. If fewer than
<code class="docutils literal notranslate"><span class="pre">min_count</span></code> non-NA values are present the result will be NA.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sum" title="apache_beam.dataframe.frames.DeferredSeries.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sum()</span></code></a></dt>
<dd>Return the sum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.min" title="apache_beam.dataframe.frames.DeferredSeries.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.min()</span></code></a></dt>
<dd>Return the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.max" title="apache_beam.dataframe.frames.DeferredSeries.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.max()</span></code></a></dt>
<dd>Return the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmin" title="apache_beam.dataframe.frames.DeferredSeries.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmax" title="apache_beam.dataframe.frames.DeferredSeries.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sum" title="apache_beam.dataframe.frames.DeferredDataFrame.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sum()</span></code></a></dt>
<dd>Return the sum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.min" title="apache_beam.dataframe.frames.DeferredDataFrame.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.min()</span></code></a></dt>
<dd>Return the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.max" title="apache_beam.dataframe.frames.DeferredDataFrame.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.max()</span></code></a></dt>
<dd>Return the maximum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmin" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmax" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum over the requested axis.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">idx</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_arrays</span><span class="p">([</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;warm&#39;</span><span class="p">,</span> <span class="s1">&#39;warm&#39;</span><span class="p">,</span> <span class="s1">&#39;cold&#39;</span><span class="p">,</span> <span class="s1">&#39;cold&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;dog&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;fish&#39;</span><span class="p">,</span> <span class="s1">&#39;spider&#39;</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">names</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;blooded&#39;</span><span class="p">,</span> <span class="s1">&#39;animal&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">8</span><span class="p">],</span> <span class="n">name</span><span class="o">=</span><span class="s1">&#39;legs&#39;</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="n">idx</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">blooded animal</span>
<span class="go">warm dog 4</span>
<span class="go"> falcon 2</span>
<span class="go">cold fish 0</span>
<span class="go"> spider 8</span>
<span class="go">Name: legs, dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="go">14</span>
<span class="go">By default, the sum of an empty or all-NA Series is ``0``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="c1"># min_count=0 is the default</span>
<span class="go">0.0</span>
<span class="go">This can be controlled with the ``min_count`` parameter. For example, if</span>
<span class="go">you&#39;d like the sum of an empty series to be NaN, pass ``min_count=1``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">min_count</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">nan</span>
<span class="go">Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and</span>
<span class="go">empty series identically.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="go">0.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">min_count</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">nan</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.mean">
<code class="descname">mean</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.mean" title="Permalink to this definition"></a></dt>
<dd><p>Return the mean of the values over the requested axis.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">mean</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.median">
<code class="descname">median</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.median" title="Permalink to this definition"></a></dt>
<dd><p>Return the median of the values over the requested axis.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">median</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.sem">
<code class="descname">sem</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.sem" title="Permalink to this definition"></a></dt>
<dd><p>Return unbiased standard error of the mean over requested axis.</p>
<p>Normalized by N-1 by default. This can be changed using the ddof argument</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>}</em>) – </li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values. If an entire row/column is NA, the result
will be NA.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
<li><strong>ddof</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 1</em>) – Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
where N represents the number of elements.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">sem</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
<p class="rubric">Notes</p>
<p>To have the same behaviour as <cite>numpy.std</cite>, use <cite>ddof=0</cite> (instead of the
default <cite>ddof=1</cite>)</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.mad">
<code class="descname">mad</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.mad" title="Permalink to this definition"></a></dt>
<dd><p>Return the mean absolute deviation of the values over the requested axis.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a scalar.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">scalar or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">mad</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.argmax">
<code class="descname">argmax</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.argmax" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.argmax.html#pandas.Series.argmax" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.argmax()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.argmin">
<code class="descname">argmin</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.argmin" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.argmin.html#pandas.Series.argmin" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.argmin()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.cummax">
<code class="descname">cummax</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.cummax" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.cummax.html#pandas.Series.cummax" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.cummax()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.cummin">
<code class="descname">cummin</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.cummin" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.cummin.html#pandas.Series.cummin" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.cummin()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.cumprod">
<code class="descname">cumprod</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.cumprod" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.cumprod.html#pandas.Series.cumprod" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.cumprod()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.cumsum">
<code class="descname">cumsum</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.cumsum" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.cumsum.html#pandas.Series.cumsum" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.cumsum()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.diff">
<code class="descname">diff</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.diff" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.diff.html#pandas.Series.diff" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.diff()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.interpolate">
<code class="descname">interpolate</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.interpolate" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.interpolate.html#pandas.Series.interpolate" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.interpolate()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.searchsorted">
<code class="descname">searchsorted</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.searchsorted" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.searchsorted.html#pandas.Series.searchsorted" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.searchsorted()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.shift">
<code class="descname">shift</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.shift" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.shift.html#pandas.Series.shift" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.shift()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.pct_change">
<code class="descname">pct_change</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.pct_change" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.pct_change.html#pandas.Series.pct_change" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.pct_change()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.is_monotonic">
<code class="descname">is_monotonic</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.is_monotonic" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.is_monotonic()</span></code> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.is_monotonic_increasing">
<code class="descname">is_monotonic_increasing</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.is_monotonic_increasing" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.is_monotonic_increasing()</span></code> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.is_monotonic_decreasing">
<code class="descname">is_monotonic_decreasing</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.is_monotonic_decreasing" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.is_monotonic_decreasing()</span></code> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.asof">
<code class="descname">asof</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.asof" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.asof.html#pandas.Series.asof" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.asof()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.first_valid_index">
<code class="descname">first_valid_index</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.first_valid_index" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.first_valid_index.html#pandas.Series.first_valid_index" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.first_valid_index()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.last_valid_index">
<code class="descname">last_valid_index</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.last_valid_index" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.last_valid_index.html#pandas.Series.last_valid_index" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.last_valid_index()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.autocorr">
<code class="descname">autocorr</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.autocorr" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.autocorr.html#pandas.Series.autocorr" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.autocorr()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.iat">
<code class="descname">iat</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.iat" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.iat()</span></code> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.head">
<code class="descname">head</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.head" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.head.html#pandas.Series.head" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.head()</span></code></a> is not yet supported in the Beam DataFrame API because it is <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">order-sensitive</a>.</p>
<p>If you want to peek at a large dataset consider using interactive Beam’s <a class="reference internal" href="apache_beam.runners.interactive.interactive_beam.html#apache_beam.runners.interactive.interactive_beam.collect" title="apache_beam.runners.interactive.interactive_beam.collect"><code class="xref py py-func docutils literal notranslate"><span class="pre">ib.collect</span></code></a> with <code class="docutils literal notranslate"><span class="pre">n</span></code> specified, or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sample" title="apache_beam.dataframe.frames.DeferredSeries.sample"><code class="xref py py-meth docutils literal notranslate"><span class="pre">sample()</span></code></a>. If you want to find the N largest elements, consider using <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.nlargest" title="apache_beam.dataframe.frames.DeferredDataFrame.nlargest"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.nlargest()</span></code></a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.tail">
<code class="descname">tail</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.tail" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.tail.html#pandas.Series.tail" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.tail()</span></code></a> is not yet supported in the Beam DataFrame API because it is <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">order-sensitive</a>.</p>
<p>If you want to peek at a large dataset consider using interactive Beam’s <a class="reference internal" href="apache_beam.runners.interactive.interactive_beam.html#apache_beam.runners.interactive.interactive_beam.collect" title="apache_beam.runners.interactive.interactive_beam.collect"><code class="xref py py-func docutils literal notranslate"><span class="pre">ib.collect</span></code></a> with <code class="docutils literal notranslate"><span class="pre">n</span></code> specified, or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sample" title="apache_beam.dataframe.frames.DeferredSeries.sample"><code class="xref py py-meth docutils literal notranslate"><span class="pre">sample()</span></code></a>. If you want to find the N largest elements, consider using <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.nlargest" title="apache_beam.dataframe.frames.DeferredDataFrame.nlargest"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.nlargest()</span></code></a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.filter">
<code class="descname">filter</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.filter" title="Permalink to this definition"></a></dt>
<dd><p>Subset the dataframe rows or columns according to the specified index labels.</p>
<p>Note that this routine does not filter a dataframe on its
contents. The filter is applied to the labels of the index.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>items</strong> (<em>list-like</em>) – Keep labels from axis which are in items.</li>
<li><strong>like</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – Keep labels from axis for which “like in label == True”.</li>
<li><strong>regex</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> (</em><em>regular expression</em><em>)</em>) – Keep labels from axis for which re.search(regex, label) == True.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>‘index’</em><em>, </em><em>1</em><em> or </em><em>‘columns’</em><em>, </em><em>None}</em><em>, </em><em>default None</em>) – The axis to filter on, expressed either as an index (int)
or axis name (str). By default this is the info axis,
‘index’ for DeferredSeries, ‘columns’ for DeferredDataFrame.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">same type as input object</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.loc" title="apache_beam.dataframe.frames.DeferredDataFrame.loc"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.loc()</span></code></a></dt>
<dd>Access a group of rows and columns by label(s) or a boolean array.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>The <code class="docutils literal notranslate"><span class="pre">items</span></code>, <code class="docutils literal notranslate"><span class="pre">like</span></code>, and <code class="docutils literal notranslate"><span class="pre">regex</span></code> parameters are
enforced to be mutually exclusive.</p>
<p><code class="docutils literal notranslate"><span class="pre">axis</span></code> defaults to the info axis that is used when indexing
with <code class="docutils literal notranslate"><span class="pre">[]</span></code>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">])),</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;mouse&#39;</span><span class="p">,</span> <span class="s1">&#39;rabbit&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;one&#39;</span><span class="p">,</span> <span class="s1">&#39;two&#39;</span><span class="p">,</span> <span class="s1">&#39;three&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> one two three</span>
<span class="go">mouse 1 2 3</span>
<span class="go">rabbit 4 5 6</span>
<span class="gp">&gt;&gt;&gt; </span><span class="c1"># select columns by name</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">items</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;one&#39;</span><span class="p">,</span> <span class="s1">&#39;three&#39;</span><span class="p">])</span>
<span class="go"> one three</span>
<span class="go">mouse 1 3</span>
<span class="go">rabbit 4 6</span>
<span class="gp">&gt;&gt;&gt; </span><span class="c1"># select columns by regular expression</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">regex</span><span class="o">=</span><span class="s1">&#39;e$&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> one three</span>
<span class="go">mouse 1 3</span>
<span class="go">rabbit 4 6</span>
<span class="gp">&gt;&gt;&gt; </span><span class="c1"># select rows containing &#39;bbi&#39;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">like</span><span class="o">=</span><span class="s1">&#39;bbi&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> one two three</span>
<span class="go">rabbit 4 5 6</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.memory_usage">
<code class="descname">memory_usage</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.memory_usage" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.memory_usage.html#pandas.Series.memory_usage" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.memory_usage()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.nbytes">
<code class="descname">nbytes</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.nbytes" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.nbytes()</span></code> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_list">
<code class="descname">to_list</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_list" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.to_list.html#pandas.Series.to_list" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.to_list()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.factorize">
<code class="descname">factorize</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.factorize" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.factorize.html#pandas.Series.factorize" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.factorize()</span></code></a> is not yet supported in the Beam DataFrame API because the columns in the output DataFrame depend on the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-columns">https://s.apache.org/dataframe-non-deferred-columns</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.nlargest">
<code class="descname">nlargest</code><span class="sig-paren">(</span><em>keep</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.nlargest"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.nlargest" title="Permalink to this definition"></a></dt>
<dd><p>Return the largest <cite>n</cite> elements.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>n</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 5</em>) – Return this many descending sorted values.</li>
<li><strong>keep</strong> (<em>{'first'</em><em>, </em><em>'last'</em><em>, </em><em>'all'}</em><em>, </em><em>default 'first'</em>) – <p>When there are duplicate values that cannot all fit in a
DeferredSeries of <cite>n</cite> elements:</p>
<ul>
<li><dl class="first docutils">
<dt><code class="docutils literal notranslate"><span class="pre">first</span></code> <span class="classifier-delimiter">:</span> <span class="classifier">return the first <cite>n</cite> occurrences in order</span></dt>
<dd>of appearance.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt><code class="docutils literal notranslate"><span class="pre">last</span></code> <span class="classifier-delimiter">:</span> <span class="classifier">return the last <cite>n</cite> occurrences in reverse</span></dt>
<dd>order of appearance.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt><code class="docutils literal notranslate"><span class="pre">all</span></code> <span class="classifier-delimiter">:</span> <span class="classifier">keep all occurrences. This can result in a DeferredSeries of</span></dt>
<dd>size larger than <cite>n</cite>.</dd>
</dl>
</li>
</ul>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The <cite>n</cite> largest values in the DeferredSeries, sorted in decreasing order.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only <code class="docutils literal notranslate"><span class="pre">keep=False</span></code> and <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> are supported. Other values of
<code class="docutils literal notranslate"><span class="pre">keep</span></code> make this an order-sensitive operation. Note <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> is
a Beam-specific option that guarantees only one duplicate will be kept, but
unlike <code class="docutils literal notranslate"><span class="pre">&quot;first&quot;</span></code> and <code class="docutils literal notranslate"><span class="pre">&quot;last&quot;</span></code> it makes no guarantees about _which_
duplicate element is kept.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.nsmallest" title="apache_beam.dataframe.frames.DeferredSeries.nsmallest"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.nsmallest()</span></code></a></dt>
<dd>Get the <cite>n</cite> smallest elements.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sort_values" title="apache_beam.dataframe.frames.DeferredSeries.sort_values"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sort_values()</span></code></a></dt>
<dd>Sort DeferredSeries by values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.head" title="apache_beam.dataframe.frames.DeferredSeries.head"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.head()</span></code></a></dt>
<dd>Return the first <cite>n</cite> rows.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Faster than <code class="docutils literal notranslate"><span class="pre">.sort_values(ascending=False).head(n)</span></code> for small <cite>n</cite>
relative to the size of the <code class="docutils literal notranslate"><span class="pre">DeferredSeries</span></code> object.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">countries_population</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;Italy&quot;</span><span class="p">:</span> <span class="mi">59000000</span><span class="p">,</span> <span class="s2">&quot;France&quot;</span><span class="p">:</span> <span class="mi">65000000</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;Malta&quot;</span><span class="p">:</span> <span class="mi">434000</span><span class="p">,</span> <span class="s2">&quot;Maldives&quot;</span><span class="p">:</span> <span class="mi">434000</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;Brunei&quot;</span><span class="p">:</span> <span class="mi">434000</span><span class="p">,</span> <span class="s2">&quot;Iceland&quot;</span><span class="p">:</span> <span class="mi">337000</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;Nauru&quot;</span><span class="p">:</span> <span class="mi">11300</span><span class="p">,</span> <span class="s2">&quot;Tuvalu&quot;</span><span class="p">:</span> <span class="mi">11300</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;Anguilla&quot;</span><span class="p">:</span> <span class="mi">11300</span><span class="p">,</span> <span class="s2">&quot;Montserrat&quot;</span><span class="p">:</span> <span class="mi">5200</span><span class="p">}</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="n">countries_population</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">Italy 59000000</span>
<span class="go">France 65000000</span>
<span class="go">Malta 434000</span>
<span class="go">Maldives 434000</span>
<span class="go">Brunei 434000</span>
<span class="go">Iceland 337000</span>
<span class="go">Nauru 11300</span>
<span class="go">Tuvalu 11300</span>
<span class="go">Anguilla 11300</span>
<span class="go">Montserrat 5200</span>
<span class="go">dtype: int64</span>
<span class="go">The `n` largest elements where ``n=5`` by default.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">nlargest</span><span class="p">()</span>
<span class="go">France 65000000</span>
<span class="go">Italy 59000000</span>
<span class="go">Malta 434000</span>
<span class="go">Maldives 434000</span>
<span class="go">Brunei 434000</span>
<span class="go">dtype: int64</span>
<span class="go">The `n` largest elements where ``n=3``. Default `keep` value is &#39;first&#39;</span>
<span class="go">so Malta will be kept.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">nlargest</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span>
<span class="go">France 65000000</span>
<span class="go">Italy 59000000</span>
<span class="go">Malta 434000</span>
<span class="go">dtype: int64</span>
<span class="go">The `n` largest elements where ``n=3`` and keeping the last duplicates.</span>
<span class="go">Brunei will be kept since it is the last with value 434000 based on</span>
<span class="go">the index order.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">nlargest</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="n">keep</span><span class="o">=</span><span class="s1">&#39;last&#39;</span><span class="p">)</span>
<span class="go">France 65000000</span>
<span class="go">Italy 59000000</span>
<span class="go">Brunei 434000</span>
<span class="go">dtype: int64</span>
<span class="go">The `n` largest elements where ``n=3`` with all duplicates kept. Note</span>
<span class="go">that the returned Series has five elements due to the three duplicates.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">nlargest</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="n">keep</span><span class="o">=</span><span class="s1">&#39;all&#39;</span><span class="p">)</span>
<span class="go">France 65000000</span>
<span class="go">Italy 59000000</span>
<span class="go">Malta 434000</span>
<span class="go">Maldives 434000</span>
<span class="go">Brunei 434000</span>
<span class="go">dtype: int64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.nsmallest">
<code class="descname">nsmallest</code><span class="sig-paren">(</span><em>keep</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.nsmallest"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.nsmallest" title="Permalink to this definition"></a></dt>
<dd><p>Return the smallest <cite>n</cite> elements.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>n</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 5</em>) – Return this many ascending sorted values.</li>
<li><strong>keep</strong> (<em>{'first'</em><em>, </em><em>'last'</em><em>, </em><em>'all'}</em><em>, </em><em>default 'first'</em>) – <p>When there are duplicate values that cannot all fit in a
DeferredSeries of <cite>n</cite> elements:</p>
<ul>
<li><dl class="first docutils">
<dt><code class="docutils literal notranslate"><span class="pre">first</span></code> <span class="classifier-delimiter">:</span> <span class="classifier">return the first <cite>n</cite> occurrences in order</span></dt>
<dd>of appearance.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt><code class="docutils literal notranslate"><span class="pre">last</span></code> <span class="classifier-delimiter">:</span> <span class="classifier">return the last <cite>n</cite> occurrences in reverse</span></dt>
<dd>order of appearance.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt><code class="docutils literal notranslate"><span class="pre">all</span></code> <span class="classifier-delimiter">:</span> <span class="classifier">keep all occurrences. This can result in a DeferredSeries of</span></dt>
<dd>size larger than <cite>n</cite>.</dd>
</dl>
</li>
</ul>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The <cite>n</cite> smallest values in the DeferredSeries, sorted in increasing order.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only <code class="docutils literal notranslate"><span class="pre">keep=False</span></code> and <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> are supported. Other values of
<code class="docutils literal notranslate"><span class="pre">keep</span></code> make this an order-sensitive operation. Note <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> is
a Beam-specific option that guarantees only one duplicate will be kept, but
unlike <code class="docutils literal notranslate"><span class="pre">&quot;first&quot;</span></code> and <code class="docutils literal notranslate"><span class="pre">&quot;last&quot;</span></code> it makes no guarantees about _which_
duplicate element is kept.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.nlargest" title="apache_beam.dataframe.frames.DeferredSeries.nlargest"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.nlargest()</span></code></a></dt>
<dd>Get the <cite>n</cite> largest elements.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sort_values" title="apache_beam.dataframe.frames.DeferredSeries.sort_values"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sort_values()</span></code></a></dt>
<dd>Sort DeferredSeries by values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.head" title="apache_beam.dataframe.frames.DeferredSeries.head"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.head()</span></code></a></dt>
<dd>Return the first <cite>n</cite> rows.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Faster than <code class="docutils literal notranslate"><span class="pre">.sort_values().head(n)</span></code> for small <cite>n</cite> relative to
the size of the <code class="docutils literal notranslate"><span class="pre">DeferredSeries</span></code> object.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">countries_population</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;Italy&quot;</span><span class="p">:</span> <span class="mi">59000000</span><span class="p">,</span> <span class="s2">&quot;France&quot;</span><span class="p">:</span> <span class="mi">65000000</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;Brunei&quot;</span><span class="p">:</span> <span class="mi">434000</span><span class="p">,</span> <span class="s2">&quot;Malta&quot;</span><span class="p">:</span> <span class="mi">434000</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;Maldives&quot;</span><span class="p">:</span> <span class="mi">434000</span><span class="p">,</span> <span class="s2">&quot;Iceland&quot;</span><span class="p">:</span> <span class="mi">337000</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;Nauru&quot;</span><span class="p">:</span> <span class="mi">11300</span><span class="p">,</span> <span class="s2">&quot;Tuvalu&quot;</span><span class="p">:</span> <span class="mi">11300</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;Anguilla&quot;</span><span class="p">:</span> <span class="mi">11300</span><span class="p">,</span> <span class="s2">&quot;Montserrat&quot;</span><span class="p">:</span> <span class="mi">5200</span><span class="p">}</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="n">countries_population</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">Italy 59000000</span>
<span class="go">France 65000000</span>
<span class="go">Brunei 434000</span>
<span class="go">Malta 434000</span>
<span class="go">Maldives 434000</span>
<span class="go">Iceland 337000</span>
<span class="go">Nauru 11300</span>
<span class="go">Tuvalu 11300</span>
<span class="go">Anguilla 11300</span>
<span class="go">Montserrat 5200</span>
<span class="go">dtype: int64</span>
<span class="go">The `n` smallest elements where ``n=5`` by default.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">nsmallest</span><span class="p">()</span>
<span class="go">Montserrat 5200</span>
<span class="go">Nauru 11300</span>
<span class="go">Tuvalu 11300</span>
<span class="go">Anguilla 11300</span>
<span class="go">Iceland 337000</span>
<span class="go">dtype: int64</span>
<span class="go">The `n` smallest elements where ``n=3``. Default `keep` value is</span>
<span class="go">&#39;first&#39; so Nauru and Tuvalu will be kept.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">nsmallest</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span>
<span class="go">Montserrat 5200</span>
<span class="go">Nauru 11300</span>
<span class="go">Tuvalu 11300</span>
<span class="go">dtype: int64</span>
<span class="go">The `n` smallest elements where ``n=3`` and keeping the last</span>
<span class="go">duplicates. Anguilla and Tuvalu will be kept since they are the last</span>
<span class="go">with value 11300 based on the index order.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">nsmallest</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="n">keep</span><span class="o">=</span><span class="s1">&#39;last&#39;</span><span class="p">)</span>
<span class="go">Montserrat 5200</span>
<span class="go">Anguilla 11300</span>
<span class="go">Tuvalu 11300</span>
<span class="go">dtype: int64</span>
<span class="go">The `n` smallest elements where ``n=3`` with all duplicates kept. Note</span>
<span class="go">that the returned Series has four elements due to the three duplicates.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">nsmallest</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="n">keep</span><span class="o">=</span><span class="s1">&#39;all&#39;</span><span class="p">)</span>
<span class="go">Montserrat 5200</span>
<span class="go">Nauru 11300</span>
<span class="go">Tuvalu 11300</span>
<span class="go">Anguilla 11300</span>
<span class="go">dtype: int64</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.is_unique">
<code class="descname">is_unique</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.is_unique" title="Permalink to this definition"></a></dt>
<dd><p>Return boolean if values in the object are unique.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body"></td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)">bool</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.plot">
<code class="descname">plot</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.plot" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.plot.html#pandas.Series.plot" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.plot()</span></code></a> is not yet supported in the Beam DataFrame API because it is a plotting tool.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-plotting-tools">https://s.apache.org/dataframe-plotting-tools</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.pop">
<code class="descname">pop</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.pop" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.pop.html#pandas.Series.pop" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.pop()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.rename_axis">
<code class="descname">rename_axis</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.rename_axis" title="Permalink to this definition"></a></dt>
<dd><p>Set the name of the axis for the index or columns.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>mapper</strong> (<em>scalar</em><em>, </em><em>list-like</em><em>, </em><em>optional</em>) – Value to set the axis name attribute.</li>
<li><strong>columns</strong> (<em>index</em><em>,</em>) – <p>A scalar, list-like, dict-like or functions transformations to
apply to that axis’ values.
Note that the <code class="docutils literal notranslate"><span class="pre">columns</span></code> parameter is not allowed if the
object is a DeferredSeries. This parameter only apply for DeferredDataFrame
type objects.</p>
<p>Use either <code class="docutils literal notranslate"><span class="pre">mapper</span></code> and <code class="docutils literal notranslate"><span class="pre">axis</span></code> to
specify the axis to target with <code class="docutils literal notranslate"><span class="pre">mapper</span></code>, or <code class="docutils literal notranslate"><span class="pre">index</span></code>
and/or <code class="docutils literal notranslate"><span class="pre">columns</span></code>.</p>
</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – The axis to rename.</li>
<li><strong>copy</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Also copy underlying data.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Modifies the object directly, instead of creating a new DeferredSeries
or DeferredDataFrame.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The same type as the caller or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a>, <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a>, or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rename" title="apache_beam.dataframe.frames.DeferredSeries.rename"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.rename()</span></code></a></dt>
<dd>Alter DeferredSeries index labels or name.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.rename" title="apache_beam.dataframe.frames.DeferredDataFrame.rename"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.rename()</span></code></a></dt>
<dd>Alter DeferredDataFrame index labels or name.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">Index.rename()</span></code></dt>
<dd>Set new names on index.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p><code class="docutils literal notranslate"><span class="pre">DeferredDataFrame.rename_axis</span></code> supports two calling conventions</p>
<ul class="simple">
<li><code class="docutils literal notranslate"><span class="pre">(index=index_mapper,</span> <span class="pre">columns=columns_mapper,</span> <span class="pre">...)</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">(mapper,</span> <span class="pre">axis={'index',</span> <span class="pre">'columns'},</span> <span class="pre">...)</span></code></li>
</ul>
<p>The first calling convention will only modify the names of
the index and/or the names of the Index object that is the columns.
In this case, the parameter <code class="docutils literal notranslate"><span class="pre">copy</span></code> is ignored.</p>
<p>The second calling convention will modify the names of the
corresponding index if mapper is a list or a scalar.
However, if mapper is dict-like or a function, it will use the
deprecated behavior of modifying the axis <em>labels</em>.</p>
<p>We <em>highly</em> recommend using keyword arguments to clarify your
intent.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">**Series**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s2">&quot;dog&quot;</span><span class="p">,</span> <span class="s2">&quot;cat&quot;</span><span class="p">,</span> <span class="s2">&quot;monkey&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 dog</span>
<span class="go">1 cat</span>
<span class="go">2 monkey</span>
<span class="go">dtype: object</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">rename_axis</span><span class="p">(</span><span class="s2">&quot;animal&quot;</span><span class="p">)</span>
<span class="go">animal</span>
<span class="go">0 dog</span>
<span class="go">1 cat</span>
<span class="go">2 monkey</span>
<span class="go">dtype: object</span>
<span class="go">**DataFrame**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;num_legs&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span>
<span class="gp">... </span> <span class="s2">&quot;num_arms&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">]},</span>
<span class="gp">... </span> <span class="p">[</span><span class="s2">&quot;dog&quot;</span><span class="p">,</span> <span class="s2">&quot;cat&quot;</span><span class="p">,</span> <span class="s2">&quot;monkey&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> num_legs num_arms</span>
<span class="go">dog 4 0</span>
<span class="go">cat 4 0</span>
<span class="go">monkey 2 2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">rename_axis</span><span class="p">(</span><span class="s2">&quot;animal&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> num_legs num_arms</span>
<span class="go">animal</span>
<span class="go">dog 4 0</span>
<span class="go">cat 4 0</span>
<span class="go">monkey 2 2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">rename_axis</span><span class="p">(</span><span class="s2">&quot;limbs&quot;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="s2">&quot;columns&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go">limbs num_legs num_arms</span>
<span class="go">animal</span>
<span class="go">dog 4 0</span>
<span class="go">cat 4 0</span>
<span class="go">monkey 2 2</span>
<span class="go">**MultiIndex**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_product</span><span class="p">([[</span><span class="s1">&#39;mammal&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;dog&#39;</span><span class="p">,</span> <span class="s1">&#39;cat&#39;</span><span class="p">,</span> <span class="s1">&#39;monkey&#39;</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">names</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;type&#39;</span><span class="p">,</span> <span class="s1">&#39;name&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go">limbs num_legs num_arms</span>
<span class="go">type name</span>
<span class="go">mammal dog 4 0</span>
<span class="go"> cat 4 0</span>
<span class="go"> monkey 2 2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rename_axis</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;type&#39;</span><span class="p">:</span> <span class="s1">&#39;class&#39;</span><span class="p">})</span>
<span class="go">limbs num_legs num_arms</span>
<span class="go">class name</span>
<span class="go">mammal dog 4 0</span>
<span class="go"> cat 4 0</span>
<span class="go"> monkey 2 2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rename_axis</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="nb">str</span><span class="o">.</span><span class="n">upper</span><span class="p">)</span>
<span class="go">LIMBS num_legs num_arms</span>
<span class="go">type name</span>
<span class="go">mammal dog 4 0</span>
<span class="go"> cat 4 0</span>
<span class="go"> monkey 2 2</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.round">
<code class="descname">round</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.round" title="Permalink to this definition"></a></dt>
<dd><p>Round each value in a Series to the given number of decimals.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>decimals</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 0</em>) – Number of decimal places to round to. If decimals is negative,
it specifies the number of positions to the left of the decimal point.</li>
<li><strong>**kwargs</strong> (<em>*args</em><em>,</em>) – <p>Additional arguments and keywords have no effect but might be
accepted for compatibility with NumPy.</p>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Rounded values of the DeferredSeries.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.around()</span></code></dt>
<dd>Round values of an np.array.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.round" title="apache_beam.dataframe.frames.DeferredDataFrame.round"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.round()</span></code></a></dt>
<dd>Round values of a DeferredDataFrame.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mf">0.1</span><span class="p">,</span> <span class="mf">1.3</span><span class="p">,</span> <span class="mf">2.7</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">round</span><span class="p">()</span>
<span class="go">0 0.0</span>
<span class="go">1 1.0</span>
<span class="go">2 3.0</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.take">
<code class="descname">take</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.take" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.take.html#pandas.Series.take" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.take()</span></code></a> is not yet supported in the Beam DataFrame API because it is deprecated in pandas.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_dict">
<code class="descname">to_dict</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_dict" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.to_dict.html#pandas.Series.to_dict" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.to_dict()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_frame">
<code class="descname">to_frame</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_frame" title="Permalink to this definition"></a></dt>
<dd><p>Convert Series to DataFrame.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>name</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><em>object</em></a><em>, </em><em>default None</em>) – The passed name should substitute for the series name (if it has
one).</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">DeferredDataFrame representation of DeferredSeries.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">,</span> <span class="s2">&quot;c&quot;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;vals&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">to_frame</span><span class="p">()</span>
<span class="go"> vals</span>
<span class="go">0 a</span>
<span class="go">1 b</span>
<span class="go">2 c</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.unique">
<code class="descname">unique</code><span class="sig-paren">(</span><em>as_series=False</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.unique"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.unique" title="Permalink to this definition"></a></dt>
<dd><p>Return unique values of Series object.</p>
<p>Uniques are returned in order of appearance. Hash table-based unique,
therefore does NOT sort.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">The unique values returned as a NumPy array. See Notes.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">ndarray or ExtensionArray</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>unique is not supported by default because it produces a
non-deferred result: an <a class="reference external" href="https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html#numpy.ndarray" title="(in NumPy v1.22)"><code class="xref py py-class docutils literal notranslate"><span class="pre">ndarray</span></code></a>. You can use the
Beam-specific argument <code class="docutils literal notranslate"><span class="pre">unique(as_series=True)</span></code> to get the result as
a <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredSeries</span></code></a></p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.unique" title="apache_beam.dataframe.frames.DeferredSeries.unique"><code class="xref py py-meth docutils literal notranslate"><span class="pre">unique()</span></code></a></dt>
<dd>Top-level unique method for any 1-d array-like object.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">Index.unique()</span></code></dt>
<dd>Return Index with unique values from an Index object.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Returns the unique values as a NumPy array. In case of an
extension-array backed DeferredSeries, a new
<code class="xref py py-class docutils literal notranslate"><span class="pre">ExtensionArray</span></code> of that type with just
the unique values is returned. This includes</p>
<blockquote>
<div><ul class="simple">
<li>Categorical</li>
<li>Period</li>
<li>Datetime with Timezone</li>
<li>Interval</li>
<li>Sparse</li>
<li>IntegerNA</li>
</ul>
</div></blockquote>
<p>See Examples section.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="n">name</span><span class="o">=</span><span class="s1">&#39;A&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span>
<span class="go">array([2, 1, 3])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;2016-01-01&#39;</span><span class="p">)</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">3</span><span class="p">)])</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span>
<span class="go">array([&#39;2016-01-01T00:00:00.000000000&#39;], dtype=&#39;datetime64[ns]&#39;)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;2016-01-01&#39;</span><span class="p">,</span> <span class="n">tz</span><span class="o">=</span><span class="s1">&#39;US/Eastern&#39;</span><span class="p">)</span>
<span class="gp">... </span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">3</span><span class="p">)])</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span>
<span class="go">&lt;DatetimeArray&gt;</span>
<span class="go">[&#39;2016-01-01 00:00:00-05:00&#39;]</span>
<span class="go">Length: 1, dtype: datetime64[ns, US/Eastern]</span>
<span class="go">An Categorical will return categories in the order of</span>
<span class="go">appearance and with the same dtype.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Categorical</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="s1">&#39;baabc&#39;</span><span class="p">)))</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span>
<span class="go">[&#39;b&#39;, &#39;a&#39;, &#39;c&#39;]</span>
<span class="go">Categories (3, object): [&#39;a&#39;, &#39;b&#39;, &#39;c&#39;]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Categorical</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="s1">&#39;baabc&#39;</span><span class="p">),</span> <span class="n">categories</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="s1">&#39;abc&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">ordered</span><span class="o">=</span><span class="kc">True</span><span class="p">))</span><span class="o">.</span><span class="n">unique</span><span class="p">()</span>
<span class="go">[&#39;b&#39;, &#39;a&#39;, &#39;c&#39;]</span>
<span class="go">Categories (3, object): [&#39;a&#39; &lt; &#39;b&#39; &lt; &#39;c&#39;]</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.update">
<code class="descname">update</code><span class="sig-paren">(</span><em>other</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.update"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.update" title="Permalink to this definition"></a></dt>
<dd><p>Modify Series in place using values from passed Series.</p>
<p>Uses non-NA values from passed Series to make updates. Aligns
on index.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><em>object coercible into DeferredSeries</em>) – </td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 4</span>
<span class="go">1 5</span>
<span class="go">2 6</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 d</span>
<span class="go">1 b</span>
<span class="go">2 e</span>
<span class="go">dtype: object</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 4</span>
<span class="go">1 5</span>
<span class="go">2 6</span>
<span class="go">dtype: int64</span>
<span class="go">If ``other`` contains NaNs the corresponding values are not updated</span>
<span class="go">in the original Series.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">6</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 4</span>
<span class="go">1 2</span>
<span class="go">2 6</span>
<span class="go">dtype: int64</span>
<span class="go">``other`` can also be a non-Series object type</span>
<span class="go">that is coercible into a Series</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">update</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">6</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 4</span>
<span class="go">1 2</span>
<span class="go">2 6</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">update</span><span class="p">({</span><span class="mi">1</span><span class="p">:</span> <span class="mi">9</span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 1</span>
<span class="go">1 9</span>
<span class="go">2 3</span>
<span class="go">dtype: int64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.unstack">
<code class="descname">unstack</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.unstack" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.unstack.html#pandas.Series.unstack" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.unstack()</span></code></a> is not yet supported in the Beam DataFrame API because the columns in the output DataFrame depend on the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-columns">https://s.apache.org/dataframe-non-deferred-columns</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.value_counts">
<code class="descname">value_counts</code><span class="sig-paren">(</span><em>sort=False</em>, <em>normalize=False</em>, <em>ascending=False</em>, <em>bins=None</em>, <em>dropna=True</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.value_counts"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.value_counts" title="Permalink to this definition"></a></dt>
<dd><p>Return a Series containing counts of unique values.</p>
<p>The resulting object will be in descending order so that the
first element is the most frequently-occurring element.
Excludes NA values by default.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>normalize</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If True then the object returned will contain the relative
frequencies of the unique values.</li>
<li><strong>sort</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Sort by frequencies.</li>
<li><strong>ascending</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Sort in ascending order.</li>
<li><strong>bins</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – Rather than count values, group them into half-open bins,
a convenience for <code class="docutils literal notranslate"><span class="pre">pd.cut</span></code>, only works with numeric data.</li>
<li><strong>dropna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Don’t include counts of NaN.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">sort</span></code> is <code class="docutils literal notranslate"><span class="pre">False</span></code> by default, and <code class="docutils literal notranslate"><span class="pre">sort=True</span></code> is not supported
because it imposes an ordering on the dataset which likely will not be
preserved.</p>
<p>When <code class="docutils literal notranslate"><span class="pre">bin</span></code> is specified this operation is not parallelizable. See
[BEAM-12441](<a class="reference external" href="https://issues.apache.org/jira/browse/BEAM-12441">https://issues.apache.org/jira/browse/BEAM-12441</a>) tracking the
possible addition of a distributed implementation.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.count" title="apache_beam.dataframe.frames.DeferredSeries.count"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.count()</span></code></a></dt>
<dd>Number of non-NA elements in a DeferredSeries.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.count" title="apache_beam.dataframe.frames.DeferredDataFrame.count"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.count()</span></code></a></dt>
<dd>Number of non-NA elements in a DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.value_counts" title="apache_beam.dataframe.frames.DeferredDataFrame.value_counts"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.value_counts()</span></code></a></dt>
<dd>Equivalent method on DeferredDataFrames.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">index</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Index</span><span class="p">([</span><span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">index</span><span class="o">.</span><span class="n">value_counts</span><span class="p">()</span>
<span class="go">3.0 2</span>
<span class="go">1.0 1</span>
<span class="go">2.0 1</span>
<span class="go">4.0 1</span>
<span class="go">dtype: int64</span>
<span class="go">With `normalize` set to `True`, returns the relative frequency by</span>
<span class="go">dividing all values by the sum of values.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">value_counts</span><span class="p">(</span><span class="n">normalize</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go">3.0 0.4</span>
<span class="go">1.0 0.2</span>
<span class="go">2.0 0.2</span>
<span class="go">4.0 0.2</span>
<span class="go">dtype: float64</span>
<span class="go">**bins**</span>
<span class="go">Bins can be useful for going from a continuous variable to a</span>
<span class="go">categorical variable; instead of counting unique</span>
<span class="go">apparitions of values, divide the index in the specified</span>
<span class="go">number of half-open bins.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">value_counts</span><span class="p">(</span><span class="n">bins</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
<span class="go">(0.996, 2.0] 2</span>
<span class="go">(2.0, 3.0] 2</span>
<span class="go">(3.0, 4.0] 1</span>
<span class="go">dtype: int64</span>
<span class="go">**dropna**</span>
<span class="go">With `dropna` set to `False` we can also see NaN index values.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">value_counts</span><span class="p">(</span><span class="n">dropna</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">3.0 2</span>
<span class="go">1.0 1</span>
<span class="go">2.0 1</span>
<span class="go">4.0 1</span>
<span class="go">NaN 1</span>
<span class="go">dtype: int64</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.values">
<code class="descname">values</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.values" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.values()</span></code> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.view">
<code class="descname">view</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.view" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.view.html#pandas.Series.view" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.view()</span></code></a> is not yet supported in the Beam DataFrame API because it relies on memory-sharing semantics that are not compatible with the Beam model.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.str">
<code class="descname">str</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.str" title="Permalink to this definition"></a></dt>
<dd><p>Vectorized string functions for Series and Index.</p>
<p>NAs stay NA unless handled otherwise by a particular method.
Patterned after Python’s string methods, with some inspiration from
R’s stringr package.</p>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s2">&quot;A_Str_Series&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 A_Str_Series</span>
<span class="go">dtype: object</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">str</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot;_&quot;</span><span class="p">)</span>
<span class="go">0 [A, Str, Series]</span>
<span class="go">dtype: object</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">str</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;_&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
<span class="go">0 AStrSeries</span>
<span class="go">dtype: object</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.cat">
<code class="descname">cat</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.cat" title="Permalink to this definition"></a></dt>
<dd><p>Accessor object for categorical properties of the Series values.</p>
<p>Be aware that assigning to <cite>categories</cite> is a inplace operation, while all
methods return new categorical data per default (but can be called with
<cite>inplace=True</cite>).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>data</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>CategoricalIndex</em>) – </td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="s2">&quot;abbccc&quot;</span><span class="p">))</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s2">&quot;category&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 a</span>
<span class="go">1 b</span>
<span class="go">2 b</span>
<span class="go">3 c</span>
<span class="go">4 c</span>
<span class="go">5 c</span>
<span class="go">dtype: category</span>
<span class="go">Categories (3, object): [&#39;a&#39;, &#39;b&#39;, &#39;c&#39;]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">cat</span><span class="o">.</span><span class="n">categories</span>
<span class="go">Index([&#39;a&#39;, &#39;b&#39;, &#39;c&#39;], dtype=&#39;object&#39;)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">cat</span><span class="o">.</span><span class="n">rename_categories</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="s2">&quot;cba&quot;</span><span class="p">))</span>
<span class="go">0 c</span>
<span class="go">1 b</span>
<span class="go">2 b</span>
<span class="go">3 a</span>
<span class="go">4 a</span>
<span class="go">5 a</span>
<span class="go">dtype: category</span>
<span class="go">Categories (3, object): [&#39;c&#39;, &#39;b&#39;, &#39;a&#39;]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">cat</span><span class="o">.</span><span class="n">reorder_categories</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="s2">&quot;cba&quot;</span><span class="p">))</span>
<span class="go">0 a</span>
<span class="go">1 b</span>
<span class="go">2 b</span>
<span class="go">3 c</span>
<span class="go">4 c</span>
<span class="go">5 c</span>
<span class="go">dtype: category</span>
<span class="go">Categories (3, object): [&#39;c&#39;, &#39;b&#39;, &#39;a&#39;]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">cat</span><span class="o">.</span><span class="n">add_categories</span><span class="p">([</span><span class="s2">&quot;d&quot;</span><span class="p">,</span> <span class="s2">&quot;e&quot;</span><span class="p">])</span>
<span class="go">0 a</span>
<span class="go">1 b</span>
<span class="go">2 b</span>
<span class="go">3 c</span>
<span class="go">4 c</span>
<span class="go">5 c</span>
<span class="go">dtype: category</span>
<span class="go">Categories (5, object): [&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;, &#39;e&#39;]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">cat</span><span class="o">.</span><span class="n">remove_categories</span><span class="p">([</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;c&quot;</span><span class="p">])</span>
<span class="go">0 NaN</span>
<span class="go">1 b</span>
<span class="go">2 b</span>
<span class="go">3 NaN</span>
<span class="go">4 NaN</span>
<span class="go">5 NaN</span>
<span class="go">dtype: category</span>
<span class="go">Categories (1, object): [&#39;b&#39;]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">cat</span><span class="o">.</span><span class="n">add_categories</span><span class="p">([</span><span class="s2">&quot;d&quot;</span><span class="p">,</span> <span class="s2">&quot;e&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span><span class="o">.</span><span class="n">cat</span><span class="o">.</span><span class="n">remove_unused_categories</span><span class="p">()</span>
<span class="go">0 a</span>
<span class="go">1 b</span>
<span class="go">2 b</span>
<span class="go">3 c</span>
<span class="go">4 c</span>
<span class="go">5 c</span>
<span class="go">dtype: category</span>
<span class="go">Categories (3, object): [&#39;a&#39;, &#39;b&#39;, &#39;c&#39;]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">cat</span><span class="o">.</span><span class="n">set_categories</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="s2">&quot;abcde&quot;</span><span class="p">))</span>
<span class="go">0 a</span>
<span class="go">1 b</span>
<span class="go">2 b</span>
<span class="go">3 c</span>
<span class="go">4 c</span>
<span class="go">5 c</span>
<span class="go">dtype: category</span>
<span class="go">Categories (5, object): [&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;, &#39;e&#39;]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">cat</span><span class="o">.</span><span class="n">as_ordered</span><span class="p">()</span>
<span class="go">0 a</span>
<span class="go">1 b</span>
<span class="go">2 b</span>
<span class="go">3 c</span>
<span class="go">4 c</span>
<span class="go">5 c</span>
<span class="go">dtype: category</span>
<span class="go">Categories (3, object): [&#39;a&#39; &lt; &#39;b&#39; &lt; &#39;c&#39;]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">cat</span><span class="o">.</span><span class="n">as_unordered</span><span class="p">()</span>
<span class="go">0 a</span>
<span class="go">1 b</span>
<span class="go">2 b</span>
<span class="go">3 c</span>
<span class="go">4 c</span>
<span class="go">5 c</span>
<span class="go">dtype: category</span>
<span class="go">Categories (3, object): [&#39;a&#39;, &#39;b&#39;, &#39;c&#39;]</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.dt">
<code class="descname">dt</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.dt" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.mode">
<code class="descname">mode</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.mode"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.mode" title="Permalink to this definition"></a></dt>
<dd><p>Return the mode(s) of the Series.</p>
<p>The mode is the value that appears most often. There can be multiple modes.</p>
<p>Always returns Series even if only one value is returned.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>dropna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Don’t consider counts of NaN/NaT.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">Modes of the DeferredSeries in sorted order.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>mode is not currently parallelizable. An approximate,
parallelizable implementation of mode may be added in the future
(<a class="reference external" href="https://issues.apache.org/jira/BEAM-12181">BEAM-12181</a>).</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.apply">
<code class="descname">apply</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.apply" title="Permalink to this definition"></a></dt>
<dd><p>Invoke function on values of Series.</p>
<p>Can be ufunc (a NumPy function that applies to the entire Series)
or a Python function that only works on single values.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>func</strong> (<em>function</em>) – Python function or NumPy ufunc to apply.</li>
<li><strong>convert_dtype</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Try to find better dtype for elementwise function results. If
False, leave as dtype=object. Note that the dtype is always
preserved for some extension array dtypes, such as Categorical.</li>
<li><strong>args</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.10)"><em>tuple</em></a>) – Positional arguments passed to func after the series value.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments passed to func.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">If func returns a DeferredSeries object the result will be a DeferredDataFrame.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.map" title="apache_beam.dataframe.frames.DeferredSeries.map"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.map()</span></code></a></dt>
<dd>For element-wise operations.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.agg" title="apache_beam.dataframe.frames.DeferredSeries.agg"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.agg()</span></code></a></dt>
<dd>Only perform aggregating type operations.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.transform" title="apache_beam.dataframe.frames.DeferredSeries.transform"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.transform()</span></code></a></dt>
<dd>Only perform transforming type operations.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Functions that mutate the passed object can produce unexpected
behavior or errors and are not supported. See <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/gotchas.html#gotchas-udf-mutation" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span>Mutating with User Defined Function (UDF) methods</span></a>
for more details.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Create a series with typical summer temperatures for each city.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">20</span><span class="p">,</span> <span class="mi">21</span><span class="p">,</span> <span class="mi">12</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;London&#39;</span><span class="p">,</span> <span class="s1">&#39;New York&#39;</span><span class="p">,</span> <span class="s1">&#39;Helsinki&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">London 20</span>
<span class="go">New York 21</span>
<span class="go">Helsinki 12</span>
<span class="go">dtype: int64</span>
<span class="go">Square the values by defining a function and passing it as an</span>
<span class="go">argument to ``apply()``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="k">def</span> <span class="nf">square</span><span class="p">(</span><span class="n">x</span><span class="p">):</span>
<span class="gp">... </span> <span class="k">return</span> <span class="n">x</span> <span class="o">**</span> <span class="mi">2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="n">square</span><span class="p">)</span>
<span class="go">London 400</span>
<span class="go">New York 441</span>
<span class="go">Helsinki 144</span>
<span class="go">dtype: int64</span>
<span class="go">Square the values by passing an anonymous function as an</span>
<span class="go">argument to ``apply()``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span> <span class="o">**</span> <span class="mi">2</span><span class="p">)</span>
<span class="go">London 400</span>
<span class="go">New York 441</span>
<span class="go">Helsinki 144</span>
<span class="go">dtype: int64</span>
<span class="go">Define a custom function that needs additional positional</span>
<span class="go">arguments and pass these additional arguments using the</span>
<span class="go">``args`` keyword.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="k">def</span> <span class="nf">subtract_custom_value</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">custom_value</span><span class="p">):</span>
<span class="gp">... </span> <span class="k">return</span> <span class="n">x</span> <span class="o">-</span> <span class="n">custom_value</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="n">subtract_custom_value</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="p">(</span><span class="mi">5</span><span class="p">,))</span>
<span class="go">London 15</span>
<span class="go">New York 16</span>
<span class="go">Helsinki 7</span>
<span class="go">dtype: int64</span>
<span class="go">Define a custom function that takes keyword arguments</span>
<span class="go">and pass these arguments to ``apply``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="k">def</span> <span class="nf">add_custom_values</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="o">**</span><span class="n">kwargs</span><span class="p">):</span>
<span class="gp">... </span> <span class="k">for</span> <span class="n">month</span> <span class="ow">in</span> <span class="n">kwargs</span><span class="p">:</span>
<span class="gp">... </span> <span class="n">x</span> <span class="o">+=</span> <span class="n">kwargs</span><span class="p">[</span><span class="n">month</span><span class="p">]</span>
<span class="gp">... </span> <span class="k">return</span> <span class="n">x</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="n">add_custom_values</span><span class="p">,</span> <span class="n">june</span><span class="o">=</span><span class="mi">30</span><span class="p">,</span> <span class="n">july</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">august</span><span class="o">=</span><span class="mi">25</span><span class="p">)</span>
<span class="go">London 95</span>
<span class="go">New York 96</span>
<span class="go">Helsinki 87</span>
<span class="go">dtype: int64</span>
<span class="go">Use a function from the Numpy library.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">log</span><span class="p">)</span>
<span class="go">London 2.995732</span>
<span class="go">New York 3.044522</span>
<span class="go">Helsinki 2.484907</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.map">
<code class="descname">map</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.map" title="Permalink to this definition"></a></dt>
<dd><p>Map values of Series according to input correspondence.</p>
<p>Used for substituting each value in a Series with another value,
that may be derived from a function, a <code class="docutils literal notranslate"><span class="pre">dict</span></code> or
a <code class="xref py py-class docutils literal notranslate"><span class="pre">Series</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>arg</strong> (<em>function</em><em>, </em><em>collections.abc.Mapping subclass</em><em> or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a>) – Mapping correspondence.</li>
<li><strong>na_action</strong> (<em>{None</em><em>, </em><em>'ignore'}</em><em>, </em><em>default None</em>) – If ‘ignore’, propagate NaN values, without passing them to the
mapping correspondence.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Same index as caller.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.apply" title="apache_beam.dataframe.frames.DeferredSeries.apply"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.apply()</span></code></a></dt>
<dd>For applying more complex functions on a DeferredSeries.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.apply" title="apache_beam.dataframe.frames.DeferredDataFrame.apply"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.apply()</span></code></a></dt>
<dd>Apply a function row-/column-wise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.applymap" title="apache_beam.dataframe.frames.DeferredDataFrame.applymap"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.applymap()</span></code></a></dt>
<dd>Apply a function elementwise on a whole DeferredDataFrame.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>When <code class="docutils literal notranslate"><span class="pre">arg</span></code> is a dictionary, values in DeferredSeries that are not in the
dictionary (as keys) are converted to <code class="docutils literal notranslate"><span class="pre">NaN</span></code>. However, if the
dictionary is a <code class="docutils literal notranslate"><span class="pre">dict</span></code> subclass that defines <code class="docutils literal notranslate"><span class="pre">__missing__</span></code> (i.e.
provides a method for default values), then this default is used
rather than <code class="docutils literal notranslate"><span class="pre">NaN</span></code>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s1">&#39;cat&#39;</span><span class="p">,</span> <span class="s1">&#39;dog&#39;</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="s1">&#39;rabbit&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 cat</span>
<span class="go">1 dog</span>
<span class="go">2 NaN</span>
<span class="go">3 rabbit</span>
<span class="go">dtype: object</span>
<span class="go">``map`` accepts a ``dict`` or a ``Series``. Values that are not found</span>
<span class="go">in the ``dict`` are converted to ``NaN``, unless the dict has a default</span>
<span class="go">value (e.g. ``defaultdict``):</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">map</span><span class="p">({</span><span class="s1">&#39;cat&#39;</span><span class="p">:</span> <span class="s1">&#39;kitten&#39;</span><span class="p">,</span> <span class="s1">&#39;dog&#39;</span><span class="p">:</span> <span class="s1">&#39;puppy&#39;</span><span class="p">})</span>
<span class="go">0 kitten</span>
<span class="go">1 puppy</span>
<span class="go">2 NaN</span>
<span class="go">3 NaN</span>
<span class="go">dtype: object</span>
<span class="go">It also accepts a function:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="s1">&#39;I am a </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">)</span>
<span class="go">0 I am a cat</span>
<span class="go">1 I am a dog</span>
<span class="go">2 I am a nan</span>
<span class="go">3 I am a rabbit</span>
<span class="go">dtype: object</span>
<span class="go">To avoid applying the function to missing values (and keep them as</span>
<span class="go">``NaN``) ``na_action=&#39;ignore&#39;`` can be used:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="s1">&#39;I am a </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">,</span> <span class="n">na_action</span><span class="o">=</span><span class="s1">&#39;ignore&#39;</span><span class="p">)</span>
<span class="go">0 I am a cat</span>
<span class="go">1 I am a dog</span>
<span class="go">2 NaN</span>
<span class="go">3 I am a rabbit</span>
<span class="go">dtype: object</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.repeat">
<code class="descname">repeat</code><span class="sig-paren">(</span><em>repeats</em>, <em>axis</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.repeat"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.repeat" title="Permalink to this definition"></a></dt>
<dd><p>Repeat elements of a Series.</p>
<p>Returns a new Series where each element of the current Series
is repeated consecutively a given number of times.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>repeats</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>array of ints</em>) – The number of repetitions for each element. This should be a
non-negative integer. Repeating 0 times will return an empty
DeferredSeries.</li>
<li><strong>axis</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a>) – Must be <code class="docutils literal notranslate"><span class="pre">None</span></code>. Has no effect but is accepted for compatibility
with numpy.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Newly created DeferredSeries with repeated elements.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">repeats</span></code> must be an <code class="docutils literal notranslate"><span class="pre">int</span></code> or a <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredSeries</span></code></a>. Lists are
not supported because they make this operation order-sensitive.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">Index.repeat()</span></code></dt>
<dd>Equivalent function for Index.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.repeat()</span></code></dt>
<dd>Similar method for <a class="reference external" href="https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html#numpy.ndarray" title="(in NumPy v1.22)"><code class="xref py py-class docutils literal notranslate"><span class="pre">numpy.ndarray</span></code></a>.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 a</span>
<span class="go">1 b</span>
<span class="go">2 c</span>
<span class="go">dtype: object</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">repeat</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
<span class="go">0 a</span>
<span class="go">0 a</span>
<span class="go">1 b</span>
<span class="go">1 b</span>
<span class="go">2 c</span>
<span class="go">2 c</span>
<span class="go">dtype: object</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">repeat</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="go">0 a</span>
<span class="go">1 b</span>
<span class="go">1 b</span>
<span class="go">2 c</span>
<span class="go">2 c</span>
<span class="go">2 c</span>
<span class="go">dtype: object</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.compare">
<code class="descname">compare</code><span class="sig-paren">(</span><em>other</em>, <em>align_axis</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredSeries.compare"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.compare" title="Permalink to this definition"></a></dt>
<dd><p>Compare to another Series and show the differences.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a>) – Object to compare with.</li>
<li><strong>align_axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 1</em>) – <p>Determine which axis to align the comparison on.</p>
<ul>
<li><dl class="first docutils">
<dt>0, or ‘index’ <span class="classifier-delimiter">:</span> <span class="classifier">Resulting differences are stacked vertically</span></dt>
<dd>with rows drawn alternately from self and other.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>1, or ‘columns’ <span class="classifier-delimiter">:</span> <span class="classifier">Resulting differences are aligned horizontally</span></dt>
<dd>with columns drawn alternately from self and other.</dd>
</dl>
</li>
</ul>
</li>
<li><strong>keep_shape</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If true, all rows and columns are kept.
Otherwise, only the ones with different values are kept.</li>
<li><strong>keep_equal</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If true, the result keeps values that are equal.
Otherwise, equal values are shown as NaNs.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><p>If axis is 0 or ‘index’ the result will be a DeferredSeries.
The resulting index will be a MultiIndex with ‘self’ and ‘other’
stacked alternately at the inner level.</p>
<p>If axis is 1 or ‘columns’ the result will be a DeferredDataFrame.
It will have two columns namely ‘self’ and ‘other’.</p>
</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.compare" title="apache_beam.dataframe.frames.DeferredDataFrame.compare"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.compare()</span></code></a></dt>
<dd>Compare with another DeferredDataFrame and show differences.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Matching NaNs will not appear as a difference.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">,</span> <span class="s2">&quot;c&quot;</span><span class="p">,</span> <span class="s2">&quot;d&quot;</span><span class="p">,</span> <span class="s2">&quot;e&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;c&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">,</span> <span class="s2">&quot;e&quot;</span><span class="p">])</span>
<span class="go">Align the differences on columns</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span><span class="o">.</span><span class="n">compare</span><span class="p">(</span><span class="n">s2</span><span class="p">)</span>
<span class="go"> self other</span>
<span class="go">1 b a</span>
<span class="go">3 d b</span>
<span class="go">Stack the differences on indices</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span><span class="o">.</span><span class="n">compare</span><span class="p">(</span><span class="n">s2</span><span class="p">,</span> <span class="n">align_axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">1 self b</span>
<span class="go"> other a</span>
<span class="go">3 self d</span>
<span class="go"> other b</span>
<span class="go">dtype: object</span>
<span class="go">Keep all original rows</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span><span class="o">.</span><span class="n">compare</span><span class="p">(</span><span class="n">s2</span><span class="p">,</span> <span class="n">keep_shape</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> self other</span>
<span class="go">0 NaN NaN</span>
<span class="go">1 b a</span>
<span class="go">2 NaN NaN</span>
<span class="go">3 d b</span>
<span class="go">4 NaN NaN</span>
<span class="go">Keep all original rows and also all original values</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span><span class="o">.</span><span class="n">compare</span><span class="p">(</span><span class="n">s2</span><span class="p">,</span> <span class="n">keep_shape</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">keep_equal</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> self other</span>
<span class="go">0 a a</span>
<span class="go">1 b a</span>
<span class="go">2 c c</span>
<span class="go">3 d b</span>
<span class="go">4 e e</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.abs">
<code class="descname">abs</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.abs" title="Permalink to this definition"></a></dt>
<dd><p>Return a Series/DataFrame with absolute numeric value of each element.</p>
<p>This function only applies to elements that are all numeric.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">DeferredSeries/DeferredDataFrame containing the absolute value of each element.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">abs</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.absolute()</span></code></dt>
<dd>Calculate the absolute value element-wise.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>For <code class="docutils literal notranslate"><span class="pre">complex</span></code> inputs, <code class="docutils literal notranslate"><span class="pre">1.2</span> <span class="pre">+</span> <span class="pre">1j</span></code>, the absolute value is
<span class="math notranslate nohighlight">\(\sqrt{ a^2 + b^2 }\)</span>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Absolute numeric values in a Series.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="o">-</span><span class="mf">1.10</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="o">-</span><span class="mf">3.33</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span>
<span class="go">0 1.10</span>
<span class="go">1 2.00</span>
<span class="go">2 3.33</span>
<span class="go">3 4.00</span>
<span class="go">dtype: float64</span>
<span class="go">Absolute numeric values in a Series with complex numbers.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mf">1.2</span> <span class="o">+</span> <span class="mi">1</span><span class="n">j</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span>
<span class="go">0 1.56205</span>
<span class="go">dtype: float64</span>
<span class="go">Absolute numeric values in a Series with a Timedelta element.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">pd</span><span class="o">.</span><span class="n">Timedelta</span><span class="p">(</span><span class="s1">&#39;1 days&#39;</span><span class="p">)])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span>
<span class="go">0 1 days</span>
<span class="go">dtype: timedelta64[ns]</span>
<span class="go">Select rows with data closest to certain value using argsort (from</span>
<span class="go">`StackOverflow &lt;https://stackoverflow.com/a/17758115&gt;`__).</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span>
<span class="gp">... </span> <span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">10</span><span class="p">,</span> <span class="mi">20</span><span class="p">,</span> <span class="mi">30</span><span class="p">,</span> <span class="mi">40</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">50</span><span class="p">,</span> <span class="o">-</span><span class="mi">30</span><span class="p">,</span> <span class="o">-</span><span class="mi">50</span><span class="p">]</span>
<span class="gp">... </span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> a b c</span>
<span class="go">0 4 10 100</span>
<span class="go">1 5 20 50</span>
<span class="go">2 6 30 -30</span>
<span class="go">3 7 40 -50</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[(</span><span class="n">df</span><span class="o">.</span><span class="n">c</span> <span class="o">-</span> <span class="mi">43</span><span class="p">)</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span><span class="o">.</span><span class="n">argsort</span><span class="p">()]</span>
<span class="go"> a b c</span>
<span class="go">1 5 20 50</span>
<span class="go">0 4 10 100</span>
<span class="go">2 6 30 -30</span>
<span class="go">3 7 40 -50</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.add">
<code class="descname">add</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.add" title="Permalink to this definition"></a></dt>
<dd><p>Return Addition of series and other, element-wise (binary operator <cite>add</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">+</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.radd" title="apache_beam.dataframe.frames.DeferredSeries.radd"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.radd()</span></code></a></dt>
<dd>Reverse of the Addition operator, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 2.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.asfreq">
<code class="descname">asfreq</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.asfreq" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.asfreq.html#pandas.Series.asfreq" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.asfreq()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘asfreq’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.astype">
<code class="descname">astype</code><span class="sig-paren">(</span><em>dtype</em>, <em>copy</em>, <em>errors</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.astype" title="Permalink to this definition"></a></dt>
<dd><p>Cast a pandas object to a specified dtype <code class="docutils literal notranslate"><span class="pre">dtype</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>dtype</strong> (<em>data type</em><em>, or </em><em>dict of column name -&gt; data type</em>) – Use a numpy.dtype or Python type to cast entire pandas object to
the same type. Alternatively, use {col: dtype, …}, where col is a
column label and dtype is a numpy.dtype or Python type to cast one
or more of the DeferredDataFrame’s columns to column-specific types.</li>
<li><strong>copy</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Return a copy when <code class="docutils literal notranslate"><span class="pre">copy=True</span></code> (be very careful setting
<code class="docutils literal notranslate"><span class="pre">copy=False</span></code> as changes to values then may propagate to other
pandas objects).</li>
<li><strong>errors</strong> (<em>{'raise'</em><em>, </em><em>'ignore'}</em><em>, </em><em>default 'raise'</em>) – <p>Control raising of exceptions on invalid data for provided dtype.</p>
<ul>
<li><code class="docutils literal notranslate"><span class="pre">raise</span></code> : allow exceptions to be raised</li>
<li><code class="docutils literal notranslate"><span class="pre">ignore</span></code> : suppress exceptions. On error return original object.</li>
</ul>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><strong>casted</strong></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">same type as caller</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>astype is not parallelizable when <code class="docutils literal notranslate"><span class="pre">errors=&quot;ignore&quot;</span></code> is specified.</p>
<p><code class="docutils literal notranslate"><span class="pre">copy=False</span></code> is not supported because it relies on memory-sharing
semantics.</p>
<p><code class="docutils literal notranslate"><span class="pre">dtype=&quot;category</span></code> is not supported because the type of the output column
depends on the data. Please use <code class="docutils literal notranslate"><span class="pre">pd.CategoricalDtype</span></code> with explicit
categories instead.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">to_datetime()</span></code></dt>
<dd>Convert argument to datetime.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">to_timedelta()</span></code></dt>
<dd>Convert argument to timedelta.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">to_numeric()</span></code></dt>
<dd>Convert argument to a numeric type.</dd>
<dt><a class="reference external" href="https://numpy.org/doc/stable/reference/generated/numpy.ndarray.astype.html#numpy.ndarray.astype" title="(in NumPy v1.22)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.ndarray.astype()</span></code></a></dt>
<dd>Cast a numpy array to a specified type.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<div class="deprecated">
<p><span class="versionmodified">Deprecated since version 1.3.0: </span>Using <code class="docutils literal notranslate"><span class="pre">astype</span></code> to convert from timezone-naive dtype to
timezone-aware dtype is deprecated and will raise in a
future version. Use <code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.dt.tz_localize()</span></code> instead.</p>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Create a DataFrame:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">d</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]}</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="n">d</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dtypes</span>
<span class="go">col1 int64</span>
<span class="go">col2 int64</span>
<span class="go">dtype: object</span>
<span class="go">Cast all columns to int32:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s1">&#39;int32&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">dtypes</span>
<span class="go">col1 int32</span>
<span class="go">col2 int32</span>
<span class="go">dtype: object</span>
<span class="go">Cast col1 to int32 using a dictionary:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">astype</span><span class="p">({</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="s1">&#39;int32&#39;</span><span class="p">})</span><span class="o">.</span><span class="n">dtypes</span>
<span class="go">col1 int32</span>
<span class="go">col2 int64</span>
<span class="go">dtype: object</span>
<span class="go">Create a series:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="s1">&#39;int32&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">dtype: int32</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s1">&#39;int64&#39;</span><span class="p">)</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">dtype: int64</span>
<span class="go">Convert to categorical type:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s1">&#39;category&#39;</span><span class="p">)</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">dtype: category</span>
<span class="go">Categories (2, int64): [1, 2]</span>
<span class="go">Convert to ordered categorical type with custom ordering:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">pandas.api.types</span> <span class="kn">import</span> <span class="n">CategoricalDtype</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">cat_dtype</span> <span class="o">=</span> <span class="n">CategoricalDtype</span><span class="p">(</span>
<span class="gp">... </span> <span class="n">categories</span><span class="o">=</span><span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">ordered</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">cat_dtype</span><span class="p">)</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">dtype: category</span>
<span class="go">Categories (2, int64): [2 &lt; 1]</span>
<span class="go">Note that using ``copy=False`` and changing data on a new</span>
<span class="go">pandas object may propagate changes:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s2</span> <span class="o">=</span> <span class="n">s1</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s1">&#39;int64&#39;</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s2</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="mi">10</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span> <span class="c1"># note that s1[0] has changed too</span>
<span class="go">0 10</span>
<span class="go">1 2</span>
<span class="go">dtype: int64</span>
<span class="go">Create a series of dates:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser_date</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s1">&#39;20200101&#39;</span><span class="p">,</span> <span class="n">periods</span><span class="o">=</span><span class="mi">3</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser_date</span>
<span class="go">0 2020-01-01</span>
<span class="go">1 2020-01-02</span>
<span class="go">2 2020-01-03</span>
<span class="go">dtype: datetime64[ns]</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.at">
<code class="descname">at</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.at" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.at()</span></code> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘at’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.at_time">
<code class="descname">at_time</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.at_time" title="Permalink to this definition"></a></dt>
<dd><p>Select values at particular time of day (e.g., 9:30AM).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>time</strong> (<a class="reference external" href="https://docs.python.org/3/library/datetime.html#datetime.time" title="(in Python v3.10)"><em>datetime.time</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – </li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – </li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#TypeError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> – If the index is not a <code class="xref py py-class docutils literal notranslate"><span class="pre">DatetimeIndex</span></code></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.between_time" title="apache_beam.dataframe.frames.DeferredSeries.between_time"><code class="xref py py-meth docutils literal notranslate"><span class="pre">between_time()</span></code></a></dt>
<dd>Select values between particular times of the day.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.first" title="apache_beam.dataframe.frames.DeferredSeries.first"><code class="xref py py-meth docutils literal notranslate"><span class="pre">first()</span></code></a></dt>
<dd>Select initial periods of time series based on a date offset.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.last" title="apache_beam.dataframe.frames.DeferredSeries.last"><code class="xref py py-meth docutils literal notranslate"><span class="pre">last()</span></code></a></dt>
<dd>Select final periods of time series based on a date offset.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">DatetimeIndex.indexer_at_time()</span></code></dt>
<dd>Get just the index locations for values at particular time of the day.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">i</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s1">&#39;2018-04-09&#39;</span><span class="p">,</span> <span class="n">periods</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">freq</span><span class="o">=</span><span class="s1">&#39;12H&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span> <span class="n">index</span><span class="o">=</span><span class="n">i</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span>
<span class="go"> A</span>
<span class="go">2018-04-09 00:00:00 1</span>
<span class="go">2018-04-09 12:00:00 2</span>
<span class="go">2018-04-10 00:00:00 3</span>
<span class="go">2018-04-10 12:00:00 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span><span class="o">.</span><span class="n">at_time</span><span class="p">(</span><span class="s1">&#39;12:00&#39;</span><span class="p">)</span>
<span class="go"> A</span>
<span class="go">2018-04-09 12:00:00 2</span>
<span class="go">2018-04-10 12:00:00 4</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.attrs">
<code class="descname">attrs</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.attrs" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.attrs()</span></code> is not yet supported in the Beam DataFrame API because it is experimental in pandas.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.backfill">
<code class="descname">backfill</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.backfill" title="Permalink to this definition"></a></dt>
<dd><p>Synonym for <code class="xref py py-meth docutils literal notranslate"><span class="pre">DataFrame.fillna()</span></code> with <code class="docutils literal notranslate"><span class="pre">method='bfill'</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Object with missing values filled or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">DeferredSeries/DeferredDataFrame or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>backfill is only supported for axis=”columns”. axis=”index” is order-sensitive.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.between_time">
<code class="descname">between_time</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.between_time" title="Permalink to this definition"></a></dt>
<dd><p>Select values between particular times of the day (e.g., 9:00-9:30 AM).</p>
<p>By setting <code class="docutils literal notranslate"><span class="pre">start_time</span></code> to be later than <code class="docutils literal notranslate"><span class="pre">end_time</span></code>,
you can get the times that are <em>not</em> between the two times.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>start_time</strong> (<a class="reference external" href="https://docs.python.org/3/library/datetime.html#datetime.time" title="(in Python v3.10)"><em>datetime.time</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – Initial time as a time filter limit.</li>
<li><strong>end_time</strong> (<a class="reference external" href="https://docs.python.org/3/library/datetime.html#datetime.time" title="(in Python v3.10)"><em>datetime.time</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – End time as a time filter limit.</li>
<li><strong>include_start</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Whether the start time needs to be included in the result.</li>
<li><strong>include_end</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Whether the end time needs to be included in the result.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – Determine range time on index or columns value.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Data from the original object filtered to the specified dates range.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#TypeError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> – If the index is not a <code class="xref py py-class docutils literal notranslate"><span class="pre">DatetimeIndex</span></code></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.at_time" title="apache_beam.dataframe.frames.DeferredSeries.at_time"><code class="xref py py-meth docutils literal notranslate"><span class="pre">at_time()</span></code></a></dt>
<dd>Select values at a particular time of the day.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.first" title="apache_beam.dataframe.frames.DeferredSeries.first"><code class="xref py py-meth docutils literal notranslate"><span class="pre">first()</span></code></a></dt>
<dd>Select initial periods of time series based on a date offset.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.last" title="apache_beam.dataframe.frames.DeferredSeries.last"><code class="xref py py-meth docutils literal notranslate"><span class="pre">last()</span></code></a></dt>
<dd>Select final periods of time series based on a date offset.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">DatetimeIndex.indexer_between_time()</span></code></dt>
<dd>Get just the index locations for values between particular times of the day.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">i</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s1">&#39;2018-04-09&#39;</span><span class="p">,</span> <span class="n">periods</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">freq</span><span class="o">=</span><span class="s1">&#39;1D20min&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span> <span class="n">index</span><span class="o">=</span><span class="n">i</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span>
<span class="go"> A</span>
<span class="go">2018-04-09 00:00:00 1</span>
<span class="go">2018-04-10 00:20:00 2</span>
<span class="go">2018-04-11 00:40:00 3</span>
<span class="go">2018-04-12 01:00:00 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span><span class="o">.</span><span class="n">between_time</span><span class="p">(</span><span class="s1">&#39;0:15&#39;</span><span class="p">,</span> <span class="s1">&#39;0:45&#39;</span><span class="p">)</span>
<span class="go"> A</span>
<span class="go">2018-04-10 00:20:00 2</span>
<span class="go">2018-04-11 00:40:00 3</span>
<span class="go">You get the times that are *not* between two times by setting</span>
<span class="go">``start_time`` later than ``end_time``:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span><span class="o">.</span><span class="n">between_time</span><span class="p">(</span><span class="s1">&#39;0:45&#39;</span><span class="p">,</span> <span class="s1">&#39;0:15&#39;</span><span class="p">)</span>
<span class="go"> A</span>
<span class="go">2018-04-09 00:00:00 1</span>
<span class="go">2018-04-12 01:00:00 4</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.bfill">
<code class="descname">bfill</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.bfill" title="Permalink to this definition"></a></dt>
<dd><p>bfill is only supported for axis=”columns”. axis=”index” is order-sensitive.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.bool">
<code class="descname">bool</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.bool" title="Permalink to this definition"></a></dt>
<dd><p>Return the bool of a single element Series or DataFrame.</p>
<p>This must be a boolean scalar value, either True or False. It will raise a
ValueError if the Series or DataFrame does not have exactly 1 element, or that
element is not boolean (integer values 0 and 1 will also raise an exception).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">The value in the DeferredSeries or DeferredDataFrame.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)">bool</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.astype" title="apache_beam.dataframe.frames.DeferredSeries.astype"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.astype()</span></code></a></dt>
<dd>Change the data type of a DeferredSeries, including to boolean.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.astype" title="apache_beam.dataframe.frames.DeferredDataFrame.astype"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.astype()</span></code></a></dt>
<dd>Change the data type of a DeferredDataFrame, including to boolean.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.bool_()</span></code></dt>
<dd>NumPy boolean data type, used by pandas for boolean values.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">The method will only work for single element objects with a boolean value:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="kc">True</span><span class="p">])</span><span class="o">.</span><span class="n">bool</span><span class="p">()</span>
<span class="go">True</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="kc">False</span><span class="p">])</span><span class="o">.</span><span class="n">bool</span><span class="p">()</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;col&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">True</span><span class="p">]})</span><span class="o">.</span><span class="n">bool</span><span class="p">()</span>
<span class="go">True</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;col&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">False</span><span class="p">]})</span><span class="o">.</span><span class="n">bool</span><span class="p">()</span>
<span class="go">False</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.combine">
<code class="descname">combine</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.combine" title="Permalink to this definition"></a></dt>
<dd><p>Perform column-wise combine with another DataFrame.</p>
<p>Combines a DataFrame with <cite>other</cite> DataFrame using <cite>func</cite>
to element-wise combine columns. The row and column indexes of the
resulting DataFrame will be the union of the two.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – The DeferredDataFrame to merge column-wise.</li>
<li><strong>func</strong> (<em>function</em>) – Function that takes two series as inputs and return a DeferredSeries or a
scalar. Used to merge the two dataframes column by columns.</li>
<li><strong>fill_value</strong> (<em>scalar value</em><em>, </em><em>default None</em>) – The value to fill NaNs with prior to passing any column to the
merge func.</li>
<li><strong>overwrite</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – If True, columns in <cite>self</cite> that do not exist in <cite>other</cite> will be
overwritten with NaNs.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Combination of the provided DeferredDataFrames.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.combine_first" title="apache_beam.dataframe.frames.DeferredDataFrame.combine_first"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.combine_first()</span></code></a></dt>
<dd>Combine two DeferredDataFrame objects and default to non-null values in frame calling the method.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Combine using a simple function that chooses the smaller column.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">take_smaller</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">s1</span><span class="p">,</span> <span class="n">s2</span><span class="p">:</span> <span class="n">s1</span> <span class="k">if</span> <span class="n">s1</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="o">&lt;</span> <span class="n">s2</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="k">else</span> <span class="n">s2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">take_smaller</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 0 3</span>
<span class="go">1 0 3</span>
<span class="go">Example using a true element-wise combine function.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">minimum</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 1 2</span>
<span class="go">1 0 3</span>
<span class="go">Using `fill_value` fills Nones prior to passing the column to the</span>
<span class="go">merge function.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">take_smaller</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=-</span><span class="mi">5</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 0 -5.0</span>
<span class="go">1 0 4.0</span>
<span class="go">However, if the same element in both dataframes is None, that None</span>
<span class="go">is preserved</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="mi">3</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">take_smaller</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=-</span><span class="mi">5</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 0 -5.0</span>
<span class="go">1 0 3.0</span>
<span class="go">Example that demonstrates the use of `overwrite` and behavior when</span>
<span class="go">the axis differ between the dataframes.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s1">&#39;C&#39;</span><span class="p">:</span> <span class="p">[</span><span class="o">-</span><span class="mi">10</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">},</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">take_smaller</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 NaN NaN NaN</span>
<span class="go">1 NaN 3.0 -10.0</span>
<span class="go">2 NaN 3.0 1.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">take_smaller</span><span class="p">,</span> <span class="n">overwrite</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 0.0 NaN NaN</span>
<span class="go">1 0.0 3.0 -10.0</span>
<span class="go">2 NaN 3.0 1.0</span>
<span class="go">Demonstrating the preference of the passed in dataframe.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s1">&#39;C&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">},</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df1</span><span class="p">,</span> <span class="n">take_smaller</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 0.0 NaN NaN</span>
<span class="go">1 0.0 3.0 NaN</span>
<span class="go">2 NaN 3.0 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df1</span><span class="p">,</span> <span class="n">take_smaller</span><span class="p">,</span> <span class="n">overwrite</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 0.0 NaN NaN</span>
<span class="go">1 0.0 3.0 1.0</span>
<span class="go">2 NaN 3.0 1.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.combine_first">
<code class="descname">combine_first</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.combine_first" title="Permalink to this definition"></a></dt>
<dd><p>Update null elements with value in the same location in <cite>other</cite>.</p>
<p>Combine two DataFrame objects by filling null values in one DataFrame
with non-null values from other DataFrame. The row and column indexes
of the resulting DataFrame will be the union of the two.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Provided DeferredDataFrame to use to fill null values.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">The result of combining the provided DeferredDataFrame with the other object.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.combine" title="apache_beam.dataframe.frames.DeferredDataFrame.combine"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.combine()</span></code></a></dt>
<dd>Perform series-wise operation on two DeferredDataFrames using a given function.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine_first</span><span class="p">(</span><span class="n">df2</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 1.0 3.0</span>
<span class="go">1 0.0 4.0</span>
<span class="go">Null values still persist if the location of that null value</span>
<span class="go">does not exist in `other`</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="kc">None</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s1">&#39;C&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]},</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine_first</span><span class="p">(</span><span class="n">df2</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 NaN 4.0 NaN</span>
<span class="go">1 0.0 3.0 1.0</span>
<span class="go">2 NaN 3.0 1.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.convert_dtypes">
<code class="descname">convert_dtypes</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.convert_dtypes" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.convert_dtypes.html#pandas.Series.convert_dtypes" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.convert_dtypes()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘convert_dtypes’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.copy">
<code class="descname">copy</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.copy" title="Permalink to this definition"></a></dt>
<dd><p>Make a copy of this object’s indices and data.</p>
<p>When <code class="docutils literal notranslate"><span class="pre">deep=True</span></code> (default), a new object will be created with a
copy of the calling object’s data and indices. Modifications to
the data or indices of the copy will not be reflected in the
original object (see notes below).</p>
<p>When <code class="docutils literal notranslate"><span class="pre">deep=False</span></code>, a new object will be created without copying
the calling object’s data or index (only references to the data
and index are copied). Any changes to the data of the original
will be reflected in the shallow copy (and vice versa).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>deep</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Make a deep copy, including a copy of the data and the indices.
With <code class="docutils literal notranslate"><span class="pre">deep=False</span></code> neither the indices nor the data are copied.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><strong>copy</strong> – Object type matches caller.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Notes</p>
<p>When <code class="docutils literal notranslate"><span class="pre">deep=True</span></code>, data is copied but actual Python objects
will not be copied recursively, only the reference to the object.
This is in contrast to <cite>copy.deepcopy</cite> in the Standard Library,
which recursively copies object data (see examples below).</p>
<p>While <code class="docutils literal notranslate"><span class="pre">Index</span></code> objects are copied when <code class="docutils literal notranslate"><span class="pre">deep=True</span></code>, the underlying
numpy array is not copied for performance reasons. Since <code class="docutils literal notranslate"><span class="pre">Index</span></code> is
immutable, the underlying data can be safely shared and a copy
is not needed.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">a 1</span>
<span class="go">b 2</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s_copy</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s_copy</span>
<span class="go">a 1</span>
<span class="go">b 2</span>
<span class="go">dtype: int64</span>
<span class="go">**Shallow copy versus default (deep) copy:**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">deep</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">shallow</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">deep</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">Shallow copy shares data and index with original.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="ow">is</span> <span class="n">shallow</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">values</span> <span class="ow">is</span> <span class="n">shallow</span><span class="o">.</span><span class="n">values</span> <span class="ow">and</span> <span class="n">s</span><span class="o">.</span><span class="n">index</span> <span class="ow">is</span> <span class="n">shallow</span><span class="o">.</span><span class="n">index</span>
<span class="go">True</span>
<span class="go">Deep copy has own copy of data and index.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="ow">is</span> <span class="n">deep</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">values</span> <span class="ow">is</span> <span class="n">deep</span><span class="o">.</span><span class="n">values</span> <span class="ow">or</span> <span class="n">s</span><span class="o">.</span><span class="n">index</span> <span class="ow">is</span> <span class="n">deep</span><span class="o">.</span><span class="n">index</span>
<span class="go">False</span>
<span class="go">Updates to the data shared by shallow copy and original is reflected</span>
<span class="go">in both; deep copy remains unchanged.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="mi">3</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">shallow</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="mi">4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">a 3</span>
<span class="go">b 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">shallow</span>
<span class="go">a 3</span>
<span class="go">b 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">deep</span>
<span class="go">a 1</span>
<span class="go">b 2</span>
<span class="go">dtype: int64</span>
<span class="go">Note that when copying an object containing Python objects, a deep copy</span>
<span class="go">will copy the data, but will not do so recursively. Updating a nested</span>
<span class="go">data object will be reflected in the deep copy.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">deep</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="mi">10</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 [10, 2]</span>
<span class="go">1 [3, 4]</span>
<span class="go">dtype: object</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">deep</span>
<span class="go">0 [10, 2]</span>
<span class="go">1 [3, 4]</span>
<span class="go">dtype: object</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.div">
<code class="descname">div</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.div" title="Permalink to this definition"></a></dt>
<dd><p>Return Floating division of series and other, element-wise (binary operator <cite>truediv</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">/</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rtruediv" title="apache_beam.dataframe.frames.DeferredSeries.rtruediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.rtruediv()</span></code></a></dt>
<dd>Reverse of the Floating division operator, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">divide</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 1.0</span>
<span class="go">b inf</span>
<span class="go">c inf</span>
<span class="go">d 0.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.divide">
<code class="descname">divide</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.divide" title="Permalink to this definition"></a></dt>
<dd><p>Return Floating division of series and other, element-wise (binary operator <cite>truediv</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">/</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rtruediv" title="apache_beam.dataframe.frames.DeferredSeries.rtruediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.rtruediv()</span></code></a></dt>
<dd>Reverse of the Floating division operator, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">divide</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 1.0</span>
<span class="go">b inf</span>
<span class="go">c inf</span>
<span class="go">d 0.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.divmod">
<code class="descname">divmod</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.divmod" title="Permalink to this definition"></a></dt>
<dd><p>Return Integer division and modulo of series and other, element-wise (binary operator <cite>divmod</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">divmod(series,</span> <span class="pre">other)</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">2-Tuple of DeferredSeries</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rdivmod" title="apache_beam.dataframe.frames.DeferredSeries.rdivmod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.rdivmod()</span></code></a></dt>
<dd>Reverse of the Integer division and modulo operator, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">divmod</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">(a 1.0</span>
<span class="go"> b NaN</span>
<span class="go"> c NaN</span>
<span class="go"> d 0.0</span>
<span class="go"> e NaN</span>
<span class="go"> dtype: float64,</span>
<span class="go"> a 0.0</span>
<span class="go"> b NaN</span>
<span class="go"> c NaN</span>
<span class="go"> d 0.0</span>
<span class="go"> e NaN</span>
<span class="go"> dtype: float64)</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.drop">
<code class="descname">drop</code><span class="sig-paren">(</span><em>labels</em>, <em>axis</em>, <em>index</em>, <em>columns</em>, <em>errors</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.drop" title="Permalink to this definition"></a></dt>
<dd><p>Drop specified labels from rows or columns.</p>
<p>Remove rows or columns by specifying label names and corresponding
axis, or by specifying directly index or column names. When using a
multi-index, labels on different levels can be removed by specifying
the level. See the <cite>user guide &lt;advanced.shown_levels&gt;</cite>
for more information about the now unused levels.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>labels</strong> (<em>single label</em><em> or </em><em>list-like</em>) – Index or column labels to drop.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – Whether to drop labels from the index (0 or ‘index’) or
columns (1 or ‘columns’).</li>
<li><strong>index</strong> (<em>single label</em><em> or </em><em>list-like</em>) – Alternative to specifying axis (<code class="docutils literal notranslate"><span class="pre">labels,</span> <span class="pre">axis=0</span></code>
is equivalent to <code class="docutils literal notranslate"><span class="pre">index=labels</span></code>).</li>
<li><strong>columns</strong> (<em>single label</em><em> or </em><em>list-like</em>) – Alternative to specifying axis (<code class="docutils literal notranslate"><span class="pre">labels,</span> <span class="pre">axis=1</span></code>
is equivalent to <code class="docutils literal notranslate"><span class="pre">columns=labels</span></code>).</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>optional</em>) – For MultiIndex, level from which the labels will be removed.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If False, return a copy. Otherwise, do operation
inplace and return None.</li>
<li><strong>errors</strong> (<em>{'ignore'</em><em>, </em><em>'raise'}</em><em>, </em><em>default 'raise'</em>) – If ‘ignore’, suppress error and only existing labels are
dropped.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">DeferredDataFrame without the removed index or column labels or
None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#KeyError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">KeyError</span></code></a> – If any of the labels is not found in the selected axis.</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>drop is not parallelizable when dropping from the index and
<code class="docutils literal notranslate"><span class="pre">errors=&quot;raise&quot;</span></code> is specified. It requires collecting all data on a single
node in order to detect if one of the index values is missing.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.loc" title="apache_beam.dataframe.frames.DeferredDataFrame.loc"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.loc()</span></code></a></dt>
<dd>Label-location based indexer for selection by label.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.dropna" title="apache_beam.dataframe.frames.DeferredDataFrame.dropna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.dropna()</span></code></a></dt>
<dd>Return DeferredDataFrame with labels on given axis omitted where (all or any) data are missing.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.drop_duplicates" title="apache_beam.dataframe.frames.DeferredDataFrame.drop_duplicates"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.drop_duplicates()</span></code></a></dt>
<dd>Return DeferredDataFrame with duplicate rows removed, optionally only considering certain columns.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.drop" title="apache_beam.dataframe.frames.DeferredSeries.drop"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.drop()</span></code></a></dt>
<dd>Return DeferredSeries with specified index labels removed.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">12</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;D&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B C D</span>
<span class="go">0 0 1 2 3</span>
<span class="go">1 4 5 6 7</span>
<span class="go">2 8 9 10 11</span>
<span class="go">Drop columns</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">([</span><span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> A D</span>
<span class="go">0 0 3</span>
<span class="go">1 4 7</span>
<span class="go">2 8 11</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">])</span>
<span class="go"> A D</span>
<span class="go">0 0 3</span>
<span class="go">1 4 7</span>
<span class="go">2 8 11</span>
<span class="go">Drop a row by index</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span>
<span class="go"> A B C D</span>
<span class="go">2 8 9 10 11</span>
<span class="go">Drop columns and/or rows of MultiIndex DataFrame</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">midx</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="p">(</span><span class="n">levels</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;lama&#39;</span><span class="p">,</span> <span class="s1">&#39;cow&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;speed&#39;</span><span class="p">,</span> <span class="s1">&#39;weight&#39;</span><span class="p">,</span> <span class="s1">&#39;length&#39;</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">codes</span><span class="o">=</span><span class="p">[[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="n">midx</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;big&#39;</span><span class="p">,</span> <span class="s1">&#39;small&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">data</span><span class="o">=</span><span class="p">[[</span><span class="mi">45</span><span class="p">,</span> <span class="mi">30</span><span class="p">],</span> <span class="p">[</span><span class="mi">200</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span> <span class="p">[</span><span class="mf">1.5</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">30</span><span class="p">,</span> <span class="mi">20</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">150</span><span class="p">],</span> <span class="p">[</span><span class="mf">1.5</span><span class="p">,</span> <span class="mf">0.8</span><span class="p">],</span> <span class="p">[</span><span class="mi">320</span><span class="p">,</span> <span class="mi">250</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mf">0.8</span><span class="p">],</span> <span class="p">[</span><span class="mf">0.3</span><span class="p">,</span> <span class="mf">0.2</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> big small</span>
<span class="go">lama speed 45.0 30.0</span>
<span class="go"> weight 200.0 100.0</span>
<span class="go"> length 1.5 1.0</span>
<span class="go">cow speed 30.0 20.0</span>
<span class="go"> weight 250.0 150.0</span>
<span class="go"> length 1.5 0.8</span>
<span class="go">falcon speed 320.0 250.0</span>
<span class="go"> weight 1.0 0.8</span>
<span class="go"> length 0.3 0.2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="s1">&#39;cow&#39;</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="s1">&#39;small&#39;</span><span class="p">)</span>
<span class="go"> big</span>
<span class="go">lama speed 45.0</span>
<span class="go"> weight 200.0</span>
<span class="go"> length 1.5</span>
<span class="go">falcon speed 320.0</span>
<span class="go"> weight 1.0</span>
<span class="go"> length 0.3</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="s1">&#39;length&#39;</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> big small</span>
<span class="go">lama speed 45.0 30.0</span>
<span class="go"> weight 200.0 100.0</span>
<span class="go">cow speed 30.0 20.0</span>
<span class="go"> weight 250.0 150.0</span>
<span class="go">falcon speed 320.0 250.0</span>
<span class="go"> weight 1.0 0.8</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.droplevel">
<code class="descname">droplevel</code><span class="sig-paren">(</span><em>level</em>, <em>axis</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.droplevel" title="Permalink to this definition"></a></dt>
<dd><p>Return Series/DataFrame with requested index / column level(s) removed.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, or </em><em>list-like</em>) – If a string is given, must be the name of a level
If list-like, elements must be names or positional indexes
of levels.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – <p>Axis along which the level(s) is removed:</p>
<ul>
<li>0 or ‘index’: remove level(s) in column.</li>
<li>1 or ‘columns’: remove level(s) in row.</li>
</ul>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">DeferredSeries/DeferredDataFrame with requested index / column level(s) removed.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">DeferredSeries/DeferredDataFrame</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">9</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="mi">12</span><span class="p">]</span>
<span class="gp">... </span><span class="p">])</span><span class="o">.</span><span class="n">set_index</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span><span class="o">.</span><span class="n">rename_axis</span><span class="p">([</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_tuples</span><span class="p">([</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;f&#39;</span><span class="p">)</span>
<span class="gp">... </span><span class="p">],</span> <span class="n">names</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;level_1&#39;</span><span class="p">,</span> <span class="s1">&#39;level_2&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go">level_1 c d</span>
<span class="go">level_2 e f</span>
<span class="go">a b</span>
<span class="go">1 2 3 4</span>
<span class="go">5 6 7 8</span>
<span class="go">9 10 11 12</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">droplevel</span><span class="p">(</span><span class="s1">&#39;a&#39;</span><span class="p">)</span>
<span class="go">level_1 c d</span>
<span class="go">level_2 e f</span>
<span class="go">b</span>
<span class="go">2 3 4</span>
<span class="go">6 7 8</span>
<span class="go">10 11 12</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">droplevel</span><span class="p">(</span><span class="s1">&#39;level_2&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">level_1 c d</span>
<span class="go">a b</span>
<span class="go">1 2 3 4</span>
<span class="go">5 6 7 8</span>
<span class="go">9 10 11 12</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.empty">
<code class="descname">empty</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.empty" title="Permalink to this definition"></a></dt>
<dd><p>Indicator whether DataFrame is empty.</p>
<p>True if DataFrame is entirely empty (no items), meaning any of the
axes are of length 0.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">If DeferredDataFrame is empty, return True, if not return False.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)">bool</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.dropna" title="apache_beam.dataframe.frames.DeferredSeries.dropna"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredSeries.dropna</span></code></a></dt>
<dd>Return series without null values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.dropna" title="apache_beam.dataframe.frames.DeferredDataFrame.dropna"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.dropna</span></code></a></dt>
<dd>Return DeferredDataFrame with labels on given axis omitted where (all or any) data are missing.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>If DeferredDataFrame contains only NaNs, it is still not considered empty. See
the example below.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">An example of an actual empty DataFrame. Notice the index is empty:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_empty</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span> <span class="p">:</span> <span class="p">[]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_empty</span>
<span class="go">Empty DataFrame</span>
<span class="go">Columns: [A]</span>
<span class="go">Index: []</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_empty</span><span class="o">.</span><span class="n">empty</span>
<span class="go">True</span>
<span class="go">If we only have NaNs in our DataFrame, it is not considered empty! We</span>
<span class="go">will need to drop the NaNs to make the DataFrame empty:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span> <span class="p">:</span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A</span>
<span class="go">0 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">empty</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">()</span><span class="o">.</span><span class="n">empty</span>
<span class="go">True</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.eq">
<code class="descname">eq</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.eq" title="Permalink to this definition"></a></dt>
<dd><p>Return Equal to of series and other, element-wise (binary operator <cite>eq</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">==</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">eq</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a True</span>
<span class="go">b False</span>
<span class="go">c False</span>
<span class="go">d False</span>
<span class="go">e False</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.equals">
<code class="descname">equals</code><span class="sig-paren">(</span><em>other</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.equals" title="Permalink to this definition"></a></dt>
<dd><p>Test whether two objects contain the same elements.</p>
<p>This function allows two Series or DataFrames to be compared against
each other to see if they have the same shape and elements. NaNs in
the same location are considered equal.</p>
<p>The row/column index do not need to have the same type, as long
as the values are considered equal. Corresponding columns must be of
the same dtype.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – The other DeferredSeries or DeferredDataFrame to be compared with the first.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">True if all elements are the same in both objects, False
otherwise.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)">bool</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.eq" title="apache_beam.dataframe.frames.DeferredSeries.eq"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.eq()</span></code></a></dt>
<dd>Compare two DeferredSeries objects of the same length and return a DeferredSeries where each element is True if the element in each DeferredSeries is equal, False otherwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eq" title="apache_beam.dataframe.frames.DeferredDataFrame.eq"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.eq()</span></code></a></dt>
<dd>Compare two DeferredDataFrame objects of the same shape and return a DeferredDataFrame where each element is True if the respective element in each DeferredDataFrame is equal, False otherwise.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">testing.assert_series_equal()</span></code></dt>
<dd>Raises an AssertionError if left and right are not equal. Provides an easy interface to ignore inequality in dtypes, indexes and precision among others.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">testing.assert_frame_equal()</span></code></dt>
<dd>Like assert_series_equal, but targets DeferredDataFrames.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.array_equal()</span></code></dt>
<dd>Return True if two arrays have the same shape and elements, False otherwise.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="mi">1</span><span class="p">:</span> <span class="p">[</span><span class="mi">10</span><span class="p">],</span> <span class="mi">2</span><span class="p">:</span> <span class="p">[</span><span class="mi">20</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> 1 2</span>
<span class="go">0 10 20</span>
<span class="go">DataFrames df and exactly_equal have the same types and values for</span>
<span class="go">their elements and column labels, which will return True.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">exactly_equal</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="mi">1</span><span class="p">:</span> <span class="p">[</span><span class="mi">10</span><span class="p">],</span> <span class="mi">2</span><span class="p">:</span> <span class="p">[</span><span class="mi">20</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">exactly_equal</span>
<span class="go"> 1 2</span>
<span class="go">0 10 20</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">equals</span><span class="p">(</span><span class="n">exactly_equal</span><span class="p">)</span>
<span class="go">True</span>
<span class="go">DataFrames df and different_column_type have the same element</span>
<span class="go">types and values, but have different types for the column labels,</span>
<span class="go">which will still return True.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">different_column_type</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="mf">1.0</span><span class="p">:</span> <span class="p">[</span><span class="mi">10</span><span class="p">],</span> <span class="mf">2.0</span><span class="p">:</span> <span class="p">[</span><span class="mi">20</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">different_column_type</span>
<span class="go"> 1.0 2.0</span>
<span class="go">0 10 20</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">equals</span><span class="p">(</span><span class="n">different_column_type</span><span class="p">)</span>
<span class="go">True</span>
<span class="go">DataFrames df and different_data_type have different types for the</span>
<span class="go">same values for their elements, and will return False even though</span>
<span class="go">their column labels are the same values and types.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">different_data_type</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="mi">1</span><span class="p">:</span> <span class="p">[</span><span class="mf">10.0</span><span class="p">],</span> <span class="mi">2</span><span class="p">:</span> <span class="p">[</span><span class="mf">20.0</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">different_data_type</span>
<span class="go"> 1 2</span>
<span class="go">0 10.0 20.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">equals</span><span class="p">(</span><span class="n">different_data_type</span><span class="p">)</span>
<span class="go">False</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.ewm">
<code class="descname">ewm</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.ewm" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.ewm.html#pandas.Series.ewm" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.ewm()</span></code></a> is not yet supported in the Beam DataFrame API because implementing it would require integrating with Beam event-time semantics</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-event-time-semantics">https://s.apache.org/dataframe-event-time-semantics</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.expanding">
<code class="descname">expanding</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.expanding" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.expanding.html#pandas.Series.expanding" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.expanding()</span></code></a> is not yet supported in the Beam DataFrame API because implementing it would require integrating with Beam event-time semantics</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-event-time-semantics">https://s.apache.org/dataframe-event-time-semantics</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.ffill">
<code class="descname">ffill</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.ffill" title="Permalink to this definition"></a></dt>
<dd><p>ffill is only supported for axis=”columns”. axis=”index” is order-sensitive.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.fillna">
<code class="descname">fillna</code><span class="sig-paren">(</span><em>value</em>, <em>method</em>, <em>axis</em>, <em>limit</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.fillna" title="Permalink to this definition"></a></dt>
<dd><p>Fill NA/NaN values using the specified method.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>value</strong> (<em>scalar</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Value to use to fill holes (e.g. 0), alternately a
dict/DeferredSeries/DeferredDataFrame of values specifying which value to use for
each index (for a DeferredSeries) or column (for a DeferredDataFrame). Values not
in the dict/DeferredSeries/DeferredDataFrame will not be filled. This value cannot
be a list.</li>
<li><strong>method</strong> (<em>{'backfill'</em><em>, </em><em>'bfill'</em><em>, </em><em>'pad'</em><em>, </em><em>'ffill'</em><em>, </em><em>None}</em><em>, </em><em>default None</em>) – Method to use for filling holes in reindexed DeferredSeries
pad / ffill: propagate last valid observation forward to next valid
backfill / bfill: use next valid observation to fill gap.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Axis along which to fill missing values.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If True, fill in-place. Note: this will modify any
other views on this object (e.g., a no-copy slice for a column in a
DeferredDataFrame).</li>
<li><strong>limit</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default None</em>) – If method is specified, this is the maximum number of consecutive
NaN values to forward/backward fill. In other words, if there is
a gap with more than this number of consecutive NaNs, it will only
be partially filled. If method is not specified, this is the
maximum number of entries along the entire axis where NaNs will be
filled. Must be greater than 0 if not None.</li>
<li><strong>downcast</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>default is None</em>) – A dict of item-&gt;dtype of what to downcast if possible,
or the string ‘infer’ which will try to downcast to an appropriate
equal type (e.g. float64 to int64 if possible).</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Object with missing values filled or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>When <code class="docutils literal notranslate"><span class="pre">axis=&quot;index&quot;</span></code>, both <code class="docutils literal notranslate"><span class="pre">method</span></code> and <code class="docutils literal notranslate"><span class="pre">limit</span></code> must be <code class="docutils literal notranslate"><span class="pre">None</span></code>.
otherwise this operation is order-sensitive.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.interpolate" title="apache_beam.dataframe.frames.DeferredSeries.interpolate"><code class="xref py py-meth docutils literal notranslate"><span class="pre">interpolate()</span></code></a></dt>
<dd>Fill NaN values using interpolation.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.reindex" title="apache_beam.dataframe.frames.DeferredSeries.reindex"><code class="xref py py-meth docutils literal notranslate"><span class="pre">reindex()</span></code></a></dt>
<dd>Conform object to new index.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.asfreq" title="apache_beam.dataframe.frames.DeferredSeries.asfreq"><code class="xref py py-meth docutils literal notranslate"><span class="pre">asfreq()</span></code></a></dt>
<dd>Convert TimeDeferredSeries to specified frequency.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">5</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">4</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="s2">&quot;ABCD&quot;</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B C D</span>
<span class="go">0 NaN 2.0 NaN 0</span>
<span class="go">1 3.0 4.0 NaN 1</span>
<span class="go">2 NaN NaN NaN 5</span>
<span class="go">3 NaN 3.0 NaN 4</span>
<span class="go">Replace all NaN elements with 0s.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> A B C D</span>
<span class="go">0 0.0 2.0 0.0 0</span>
<span class="go">1 3.0 4.0 0.0 1</span>
<span class="go">2 0.0 0.0 0.0 5</span>
<span class="go">3 0.0 3.0 0.0 4</span>
<span class="go">We can also propagate non-null values forward or backward.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">method</span><span class="o">=</span><span class="s2">&quot;ffill&quot;</span><span class="p">)</span>
<span class="go"> A B C D</span>
<span class="go">0 NaN 2.0 NaN 0</span>
<span class="go">1 3.0 4.0 NaN 1</span>
<span class="go">2 3.0 4.0 NaN 5</span>
<span class="go">3 3.0 3.0 NaN 4</span>
<span class="go">Replace all NaN elements in column &#39;A&#39;, &#39;B&#39;, &#39;C&#39;, and &#39;D&#39;, with 0, 1,</span>
<span class="go">2, and 3 respectively.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">values</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;A&quot;</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;B&quot;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;C&quot;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s2">&quot;D&quot;</span><span class="p">:</span> <span class="mi">3</span><span class="p">}</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">value</span><span class="o">=</span><span class="n">values</span><span class="p">)</span>
<span class="go"> A B C D</span>
<span class="go">0 0.0 2.0 2.0 0</span>
<span class="go">1 3.0 4.0 2.0 1</span>
<span class="go">2 0.0 1.0 2.0 5</span>
<span class="go">3 0.0 3.0 2.0 4</span>
<span class="go">Only replace the first NaN element.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">value</span><span class="o">=</span><span class="n">values</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> A B C D</span>
<span class="go">0 0.0 2.0 2.0 0</span>
<span class="go">1 3.0 4.0 NaN 1</span>
<span class="go">2 NaN 1.0 NaN 5</span>
<span class="go">3 NaN 3.0 NaN 4</span>
<span class="go">When filling using a DataFrame, replacement happens along</span>
<span class="go">the same column names and same indices</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">)),</span> <span class="n">columns</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="s2">&quot;ABCE&quot;</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">df2</span><span class="p">)</span>
<span class="go"> A B C D</span>
<span class="go">0 0.0 2.0 0.0 0</span>
<span class="go">1 3.0 4.0 0.0 1</span>
<span class="go">2 0.0 0.0 0.0 5</span>
<span class="go">3 0.0 3.0 0.0 4</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.first">
<code class="descname">first</code><span class="sig-paren">(</span><em>offset</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.first" title="Permalink to this definition"></a></dt>
<dd><p>Select initial periods of time series data based on a date offset.</p>
<p>When having a DataFrame with dates as index, this function can
select the first few rows based on a date offset.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>offset</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>DateOffset</em><em> or </em><em>dateutil.relativedelta</em>) – The offset length of the data that will be selected. For instance,
‘1M’ will display all the rows having their index within the first month.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A subset of the caller.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#TypeError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> – If the index is not a <code class="xref py py-class docutils literal notranslate"><span class="pre">DatetimeIndex</span></code></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.last" title="apache_beam.dataframe.frames.DeferredSeries.last"><code class="xref py py-meth docutils literal notranslate"><span class="pre">last()</span></code></a></dt>
<dd>Select final periods of time series based on a date offset.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.at_time" title="apache_beam.dataframe.frames.DeferredSeries.at_time"><code class="xref py py-meth docutils literal notranslate"><span class="pre">at_time()</span></code></a></dt>
<dd>Select values at a particular time of the day.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.between_time" title="apache_beam.dataframe.frames.DeferredSeries.between_time"><code class="xref py py-meth docutils literal notranslate"><span class="pre">between_time()</span></code></a></dt>
<dd>Select values between particular times of the day.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">i</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s1">&#39;2018-04-09&#39;</span><span class="p">,</span> <span class="n">periods</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">freq</span><span class="o">=</span><span class="s1">&#39;2D&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span> <span class="n">index</span><span class="o">=</span><span class="n">i</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span>
<span class="go"> A</span>
<span class="go">2018-04-09 1</span>
<span class="go">2018-04-11 2</span>
<span class="go">2018-04-13 3</span>
<span class="go">2018-04-15 4</span>
<span class="go">Get the rows for the first 3 days:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span><span class="o">.</span><span class="n">first</span><span class="p">(</span><span class="s1">&#39;3D&#39;</span><span class="p">)</span>
<span class="go"> A</span>
<span class="go">2018-04-09 1</span>
<span class="go">2018-04-11 2</span>
<span class="go">Notice the data for 3 first calendar days were returned, not the first</span>
<span class="go">3 days observed in the dataset, and therefore data for 2018-04-13 was</span>
<span class="go">not returned.</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.flags">
<code class="descname">flags</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.flags" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.flags()</span></code> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘flags’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.floordiv">
<code class="descname">floordiv</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.floordiv" title="Permalink to this definition"></a></dt>
<dd><p>Return Integer division of series and other, element-wise (binary operator <cite>floordiv</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">//</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rfloordiv" title="apache_beam.dataframe.frames.DeferredSeries.rfloordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.rfloordiv()</span></code></a></dt>
<dd>Reverse of the Integer division operator, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">floordiv</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">c NaN</span>
<span class="go">d 0.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.ge">
<code class="descname">ge</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.ge" title="Permalink to this definition"></a></dt>
<dd><p>Return Greater than or equal to of series and other, element-wise (binary operator <cite>ge</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">&gt;=</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">e 1.0</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;f&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 0.0</span>
<span class="go">b 1.0</span>
<span class="go">c 2.0</span>
<span class="go">d NaN</span>
<span class="go">f 1.0</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">ge</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a True</span>
<span class="go">b True</span>
<span class="go">c False</span>
<span class="go">d False</span>
<span class="go">e True</span>
<span class="go">f False</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.groupby">
<code class="descname">groupby</code><span class="sig-paren">(</span><em>by</em>, <em>level</em>, <em>axis</em>, <em>as_index</em>, <em>group_keys</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.groupby" title="Permalink to this definition"></a></dt>
<dd><p>Group DataFrame using a mapper or by a Series of columns.</p>
<p>A groupby operation involves some combination of splitting the
object, applying a function, and combining the results. This can be
used to group large amounts of data and compute operations on these
groups.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>by</strong> (<em>mapping</em><em>, </em><em>function</em><em>, </em><em>label</em><em>, or </em><em>list of labels</em>) – Used to determine the groups for the groupby.
If <code class="docutils literal notranslate"><span class="pre">by</span></code> is a function, it’s called on each value of the object’s
index. If a dict or DeferredSeries is passed, the DeferredSeries or dict VALUES
will be used to determine the groups (the DeferredSeries’ values are first
aligned; see <code class="docutils literal notranslate"><span class="pre">.align()</span></code> method). If an ndarray is passed, the
values are used as-is to determine the groups. A label or list of
labels may be passed to group by the columns in <code class="docutils literal notranslate"><span class="pre">self</span></code>. Notice
that a tuple is interpreted as a (single) key.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – Split along rows (0) or columns (1).</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>level name</em><em>, or </em><em>sequence of such</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), group by a particular
level or levels.</li>
<li><strong>as_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – For aggregated output, return object with group labels as the
index. Only relevant for DeferredDataFrame input. as_index=False is
effectively “SQL-style” grouped output.</li>
<li><strong>sort</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Sort group keys. Get better performance by turning this off.
Note this does not influence the order of observations within each
group. Groupby preserves the order of rows within each group.</li>
<li><strong>group_keys</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – When calling apply, add group keys to index to identify pieces.</li>
<li><strong>squeeze</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>Reduce the dimensionality of the return type if possible,
otherwise return a consistent type.</p>
<div class="deprecated">
<p><span class="versionmodified">Deprecated since version 1.1.0.</span></p>
</div>
</li>
<li><strong>observed</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – This only applies if any of the groupers are Categoricals.
If True: only show observed values for categorical groupers.
If False: show all values for categorical groupers.</li>
<li><strong>dropna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – <p>If True, and if group keys contain NA values, NA values together
with row/column will be dropped.
If False, NA values will also be treated as the key in groups</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Returns a groupby object that contains information about the groups.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">DeferredDataFrameGroupBy</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">as_index</span></code> and <code class="docutils literal notranslate"><span class="pre">group_keys</span></code> must both be <code class="docutils literal notranslate"><span class="pre">True</span></code>.</p>
<p>Aggregations grouping by a categorical column with <code class="docutils literal notranslate"><span class="pre">observed=False</span></code> set
are not currently parallelizable
(<a class="reference external" href="https://issues.apache.org/jira/browse/BEAM-11190">BEAM-11190</a>).</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.resample" title="apache_beam.dataframe.frames.DeferredSeries.resample"><code class="xref py py-meth docutils literal notranslate"><span class="pre">resample()</span></code></a></dt>
<dd>Convenience method for frequency conversion and resampling of time series.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>See the <a class="reference external" href="https://pandas.pydata.org/pandas-docs/stable/groupby.html">user guide</a> for more.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;Animal&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;Falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;Falcon&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;Parrot&#39;</span><span class="p">,</span> <span class="s1">&#39;Parrot&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;Max Speed&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mf">380.</span><span class="p">,</span> <span class="mf">370.</span><span class="p">,</span> <span class="mf">24.</span><span class="p">,</span> <span class="mf">26.</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> Animal Max Speed</span>
<span class="go">0 Falcon 380.0</span>
<span class="go">1 Falcon 370.0</span>
<span class="go">2 Parrot 24.0</span>
<span class="go">3 Parrot 26.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">([</span><span class="s1">&#39;Animal&#39;</span><span class="p">])</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="go"> Max Speed</span>
<span class="go">Animal</span>
<span class="go">Falcon 375.0</span>
<span class="go">Parrot 25.0</span>
<span class="go">**Hierarchical Indexes**</span>
<span class="go">We can groupby different levels of a hierarchical index</span>
<span class="go">using the `level` parameter:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">arrays</span> <span class="o">=</span> <span class="p">[[</span><span class="s1">&#39;Falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;Falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;Parrot&#39;</span><span class="p">,</span> <span class="s1">&#39;Parrot&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;Captive&#39;</span><span class="p">,</span> <span class="s1">&#39;Wild&#39;</span><span class="p">,</span> <span class="s1">&#39;Captive&#39;</span><span class="p">,</span> <span class="s1">&#39;Wild&#39;</span><span class="p">]]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">index</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_arrays</span><span class="p">(</span><span class="n">arrays</span><span class="p">,</span> <span class="n">names</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;Animal&#39;</span><span class="p">,</span> <span class="s1">&#39;Type&#39;</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;Max Speed&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mf">390.</span><span class="p">,</span> <span class="mf">350.</span><span class="p">,</span> <span class="mf">30.</span><span class="p">,</span> <span class="mf">20.</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="n">index</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> Max Speed</span>
<span class="go">Animal Type</span>
<span class="go">Falcon Captive 390.0</span>
<span class="go"> Wild 350.0</span>
<span class="go">Parrot Captive 30.0</span>
<span class="go"> Wild 20.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="go"> Max Speed</span>
<span class="go">Animal</span>
<span class="go">Falcon 370.0</span>
<span class="go">Parrot 25.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="s2">&quot;Type&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="go"> Max Speed</span>
<span class="go">Type</span>
<span class="go">Captive 210.0</span>
<span class="go">Wild 185.0</span>
<span class="go">We can also choose to include NA in group keys or not by setting</span>
<span class="go">`dropna` parameter, the default setting is `True`:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">l</span> <span class="o">=</span> <span class="p">[[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">]]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">,</span> <span class="s2">&quot;c&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;b&quot;</span><span class="p">])</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="go"> a c</span>
<span class="go">b</span>
<span class="go">1.0 2 3</span>
<span class="go">2.0 2 5</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;b&quot;</span><span class="p">],</span> <span class="n">dropna</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="go"> a c</span>
<span class="go">b</span>
<span class="go">1.0 2 3</span>
<span class="go">2.0 2 5</span>
<span class="go">NaN 1 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">l</span> <span class="o">=</span> <span class="p">[[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="mi">12</span><span class="p">,</span> <span class="mi">12</span><span class="p">],</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="mf">12.3</span><span class="p">,</span> <span class="mf">33.</span><span class="p">],</span> <span class="p">[</span><span class="s2">&quot;b&quot;</span><span class="p">,</span> <span class="mf">12.3</span><span class="p">,</span> <span class="mi">123</span><span class="p">],</span> <span class="p">[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">,</span> <span class="s2">&quot;c&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="s2">&quot;a&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="go"> b c</span>
<span class="go">a</span>
<span class="go">a 13.0 13.0</span>
<span class="go">b 12.3 123.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="n">dropna</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="go"> b c</span>
<span class="go">a</span>
<span class="go">a 13.0 13.0</span>
<span class="go">b 12.3 123.0</span>
<span class="go">NaN 12.3 33.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.gt">
<code class="descname">gt</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.gt" title="Permalink to this definition"></a></dt>
<dd><p>Return Greater than of series and other, element-wise (binary operator <cite>gt</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">&gt;</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">e 1.0</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;f&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 0.0</span>
<span class="go">b 1.0</span>
<span class="go">c 2.0</span>
<span class="go">d NaN</span>
<span class="go">f 1.0</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">gt</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a True</span>
<span class="go">b False</span>
<span class="go">c False</span>
<span class="go">d False</span>
<span class="go">e True</span>
<span class="go">f False</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.hist">
<code class="descname">hist</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.hist" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.hist.html#pandas.DataFrame.hist" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.hist()</span></code></a> is not yet supported in the Beam DataFrame API because it is a plotting tool.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-plotting-tools">https://s.apache.org/dataframe-plotting-tools</a>.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.iloc">
<code class="descname">iloc</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.iloc" title="Permalink to this definition"></a></dt>
<dd><p>Purely integer-location based indexing for selection by position.</p>
<p><code class="docutils literal notranslate"><span class="pre">.iloc[]</span></code> is primarily integer position based (from <code class="docutils literal notranslate"><span class="pre">0</span></code> to
<code class="docutils literal notranslate"><span class="pre">length-1</span></code> of the axis), but may also be used with a boolean
array.</p>
<p>Allowed inputs are:</p>
<ul class="simple">
<li>An integer, e.g. <code class="docutils literal notranslate"><span class="pre">5</span></code>.</li>
<li>A list or array of integers, e.g. <code class="docutils literal notranslate"><span class="pre">[4,</span> <span class="pre">3,</span> <span class="pre">0]</span></code>.</li>
<li>A slice object with ints, e.g. <code class="docutils literal notranslate"><span class="pre">1:7</span></code>.</li>
<li>A boolean array.</li>
<li>A <code class="docutils literal notranslate"><span class="pre">callable</span></code> function with one argument (the calling Series or
DataFrame) and that returns valid output for indexing (one of the above).
This is useful in method chains, when you don’t have a reference to the
calling object, but would like to base your selection on some value.</li>
</ul>
<p><code class="docutils literal notranslate"><span class="pre">.iloc</span></code> will raise <code class="docutils literal notranslate"><span class="pre">IndexError</span></code> if a requested indexer is
out-of-bounds, except <em>slice</em> indexers which allow out-of-bounds
indexing (this conforms with python/numpy <em>slice</em> semantics).</p>
<p>See more at <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/indexing.html#indexing-integer" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">Selection by Position</span></a>.</p>
<p class="rubric">Differences from pandas</p>
<p>Position-based indexing with <cite>iloc</cite> is order-sensitive in almost every
case. Beam DataFrame users should prefer label-based indexing with <cite>loc</cite>.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.iat" title="apache_beam.dataframe.frames.DeferredDataFrame.iat"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.iat</span></code></a></dt>
<dd>Fast integer location scalar accessor.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.loc" title="apache_beam.dataframe.frames.DeferredDataFrame.loc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.loc</span></code></a></dt>
<dd>Purely label-location based indexer for selection by label.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.iloc" title="apache_beam.dataframe.frames.DeferredSeries.iloc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredSeries.iloc</span></code></a></dt>
<dd>Purely integer-location based indexing for selection by position.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">mydict</span> <span class="o">=</span> <span class="p">[{</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="mi">3</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">:</span> <span class="mi">4</span><span class="p">},</span>
<span class="gp">... </span> <span class="p">{</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="mi">100</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="mi">200</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="mi">300</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">:</span> <span class="mi">400</span><span class="p">},</span>
<span class="gp">... </span> <span class="p">{</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="mi">1000</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="mi">2000</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="mi">3000</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">:</span> <span class="mi">4000</span> <span class="p">}]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">mydict</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
<span class="go">1 100 200 300 400</span>
<span class="go">2 1000 2000 3000 4000</span>
<span class="go">**Indexing just the rows**</span>
<span class="go">With a scalar integer.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="nb">type</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="go">&lt;class &#39;pandas.core.series.Series&#39;&gt;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="go">a 1</span>
<span class="go">b 2</span>
<span class="go">c 3</span>
<span class="go">d 4</span>
<span class="go">Name: 0, dtype: int64</span>
<span class="go">With a list of integers.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[[</span><span class="mi">0</span><span class="p">]]</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="nb">type</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[[</span><span class="mi">0</span><span class="p">]])</span>
<span class="go">&lt;class &#39;pandas.core.frame.DataFrame&#39;&gt;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">]]</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
<span class="go">1 100 200 300 400</span>
<span class="go">With a `slice` object.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:</span><span class="mi">3</span><span class="p">]</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
<span class="go">1 100 200 300 400</span>
<span class="go">2 1000 2000 3000 4000</span>
<span class="go">With a boolean mask the same length as the index.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">]]</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
<span class="go">2 1000 2000 3000 4000</span>
<span class="go">With a callable, useful in method chains. The `x` passed</span>
<span class="go">to the ``lambda`` is the DataFrame being sliced. This selects</span>
<span class="go">the rows whose index label even.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">index</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span><span class="p">]</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
<span class="go">2 1000 2000 3000 4000</span>
<span class="go">**Indexing both axes**</span>
<span class="go">You can mix the indexer types for the index and columns. Use ``:`` to</span>
<span class="go">select the entire axis.</span>
<span class="go">With scalar integers.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span>
<span class="go">2</span>
<span class="go">With lists of integers.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">]]</span>
<span class="go"> b d</span>
<span class="go">0 2 4</span>
<span class="go">2 2000 4000</span>
<span class="go">With `slice` objects.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="mi">3</span><span class="p">,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">3</span><span class="p">]</span>
<span class="go"> a b c</span>
<span class="go">1 100 200 300</span>
<span class="go">2 1000 2000 3000</span>
<span class="go">With a boolean array whose length matches the columns.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">]]</span>
<span class="go"> a c</span>
<span class="go">0 1 3</span>
<span class="go">1 100 300</span>
<span class="go">2 1000 3000</span>
<span class="go">With a callable function that expects the Series or DataFrame.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="k">lambda</span> <span class="n">df</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">]]</span>
<span class="go"> a c</span>
<span class="go">0 1 3</span>
<span class="go">1 100 300</span>
<span class="go">2 1000 3000</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.index">
<code class="descname">index</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.index" title="Permalink to this definition"></a></dt>
<dd><p>The index (row labels) of the DataFrame.</p>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.infer_object">
<code class="descname">infer_object</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.infer_object" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.infer_objects.html#pandas.Series.infer_objects" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.infer_objects()</span></code></a> is not yet supported in the Beam DataFrame API because the columns in the output DataFrame depend on the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-columns">https://s.apache.org/dataframe-non-deferred-columns</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.infer_objects">
<code class="descname">infer_objects</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.infer_objects" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.infer_objects.html#pandas.Series.infer_objects" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.infer_objects()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘infer_objects’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.isin">
<code class="descname">isin</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.isin" title="Permalink to this definition"></a></dt>
<dd><p>Whether each element in the DataFrame is contained in values.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>values</strong> (<em>iterable</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a>) – The result will only be true at a location if all the
labels match. If <cite>values</cite> is a DeferredSeries, that’s the index. If
<cite>values</cite> is a dict, the keys must be the column names,
which must match. If <cite>values</cite> is a DeferredDataFrame,
then both the index and column labels must match.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">DeferredDataFrame of booleans showing whether each element in the DeferredDataFrame
is contained in values.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eq" title="apache_beam.dataframe.frames.DeferredDataFrame.eq"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.eq()</span></code></a></dt>
<dd>Equality test for DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.isin" title="apache_beam.dataframe.frames.DeferredSeries.isin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.isin()</span></code></a></dt>
<dd>Equivalent method on DeferredSeries.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.str.contains()</span></code></dt>
<dd>Test if pattern or regex is contained within a string of a DeferredSeries or Index.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;num_legs&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="s1">&#39;num_wings&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;dog&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> num_legs num_wings</span>
<span class="go">falcon 2 2</span>
<span class="go">dog 4 0</span>
<span class="go">When ``values`` is a list check whether every value in the DataFrame</span>
<span class="go">is present in the list (which animals have 0 or 2 legs or wings)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">isin</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
<span class="go"> num_legs num_wings</span>
<span class="go">falcon True True</span>
<span class="go">dog False True</span>
<span class="go">When ``values`` is a dict, we can pass values to check for each</span>
<span class="go">column separately:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">isin</span><span class="p">({</span><span class="s1">&#39;num_wings&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">]})</span>
<span class="go"> num_legs num_wings</span>
<span class="go">falcon False False</span>
<span class="go">dog False True</span>
<span class="go">When ``values`` is a Series or DataFrame the index and column must</span>
<span class="go">match. Note that &#39;falcon&#39; does not match based on the number of legs</span>
<span class="go">in df2.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;num_legs&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">8</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;num_wings&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;spider&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">isin</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
<span class="go"> num_legs num_wings</span>
<span class="go">falcon True True</span>
<span class="go">dog False False</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.item">
<code class="descname">item</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.item" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.item.html#pandas.Series.item" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.item()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘item’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.last">
<code class="descname">last</code><span class="sig-paren">(</span><em>offset</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.last" title="Permalink to this definition"></a></dt>
<dd><p>Select final periods of time series data based on a date offset.</p>
<p>For a DataFrame with a sorted DatetimeIndex, this function
selects the last few rows based on a date offset.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>offset</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>DateOffset</em><em>, </em><em>dateutil.relativedelta</em>) – The offset length of the data that will be selected. For instance,
‘3D’ will display all the rows having their index within the last 3 days.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A subset of the caller.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#TypeError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> – If the index is not a <code class="xref py py-class docutils literal notranslate"><span class="pre">DatetimeIndex</span></code></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.first" title="apache_beam.dataframe.frames.DeferredSeries.first"><code class="xref py py-meth docutils literal notranslate"><span class="pre">first()</span></code></a></dt>
<dd>Select initial periods of time series based on a date offset.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.at_time" title="apache_beam.dataframe.frames.DeferredSeries.at_time"><code class="xref py py-meth docutils literal notranslate"><span class="pre">at_time()</span></code></a></dt>
<dd>Select values at a particular time of the day.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.between_time" title="apache_beam.dataframe.frames.DeferredSeries.between_time"><code class="xref py py-meth docutils literal notranslate"><span class="pre">between_time()</span></code></a></dt>
<dd>Select values between particular times of the day.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">i</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s1">&#39;2018-04-09&#39;</span><span class="p">,</span> <span class="n">periods</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">freq</span><span class="o">=</span><span class="s1">&#39;2D&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span> <span class="n">index</span><span class="o">=</span><span class="n">i</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span>
<span class="go"> A</span>
<span class="go">2018-04-09 1</span>
<span class="go">2018-04-11 2</span>
<span class="go">2018-04-13 3</span>
<span class="go">2018-04-15 4</span>
<span class="go">Get the rows for the last 3 days:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span><span class="o">.</span><span class="n">last</span><span class="p">(</span><span class="s1">&#39;3D&#39;</span><span class="p">)</span>
<span class="go"> A</span>
<span class="go">2018-04-13 3</span>
<span class="go">2018-04-15 4</span>
<span class="go">Notice the data for 3 last calendar days were returned, not the last</span>
<span class="go">3 observed days in the dataset, and therefore data for 2018-04-11 was</span>
<span class="go">not returned.</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.le">
<code class="descname">le</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.le" title="Permalink to this definition"></a></dt>
<dd><p>Return Less than or equal to of series and other, element-wise (binary operator <cite>le</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">&lt;=</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">e 1.0</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;f&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 0.0</span>
<span class="go">b 1.0</span>
<span class="go">c 2.0</span>
<span class="go">d NaN</span>
<span class="go">f 1.0</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">le</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a False</span>
<span class="go">b True</span>
<span class="go">c True</span>
<span class="go">d False</span>
<span class="go">e False</span>
<span class="go">f True</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.length">
<code class="descname">length</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.length" title="Permalink to this definition"></a></dt>
<dd><p>Alternative to <code class="docutils literal notranslate"><span class="pre">len(df)</span></code> which returns a deferred result that can be
used in arithmetic with <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredSeries</span></code></a> or
<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code></a> instances.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.loc">
<code class="descname">loc</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.loc" title="Permalink to this definition"></a></dt>
<dd><p>Access a group of rows and columns by label(s) or a boolean array.</p>
<p><code class="docutils literal notranslate"><span class="pre">.loc[]</span></code> is primarily label based, but may also be used with a
boolean array.</p>
<p>Allowed inputs are:</p>
<ul>
<li><p class="first">A single label, e.g. <code class="docutils literal notranslate"><span class="pre">5</span></code> or <code class="docutils literal notranslate"><span class="pre">'a'</span></code>, (note that <code class="docutils literal notranslate"><span class="pre">5</span></code> is
interpreted as a <em>label</em> of the index, and <strong>never</strong> as an
integer position along the index).</p>
</li>
<li><p class="first">A list or array of labels, e.g. <code class="docutils literal notranslate"><span class="pre">['a',</span> <span class="pre">'b',</span> <span class="pre">'c']</span></code>.</p>
</li>
<li><p class="first">A slice object with labels, e.g. <code class="docutils literal notranslate"><span class="pre">'a':'f'</span></code>.</p>
<div class="admonition warning">
<p class="first admonition-title">Warning</p>
<p class="last">Note that contrary to usual python slices, <strong>both</strong> the
start and the stop are included</p>
</div>
</li>
<li><p class="first">A boolean array of the same length as the axis being sliced,
e.g. <code class="docutils literal notranslate"><span class="pre">[True,</span> <span class="pre">False,</span> <span class="pre">True]</span></code>.</p>
</li>
<li><p class="first">An alignable boolean Series. The index of the key will be aligned before
masking.</p>
</li>
<li><p class="first">An alignable Index. The Index of the returned selection will be the input.</p>
</li>
<li><p class="first">A <code class="docutils literal notranslate"><span class="pre">callable</span></code> function with one argument (the calling Series or
DataFrame) and that returns valid output for indexing (one of the above)</p>
</li>
</ul>
<p>See more at <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/indexing.html#indexing-label" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">Selection by Label</span></a>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Raises:</th><td class="field-body"><ul class="first last simple">
<li><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#KeyError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">KeyError</span></code></a> – If any items are not found.</li>
<li><code class="xref py py-exc docutils literal notranslate"><span class="pre">IndexingError</span></code> – If an indexed key is passed and its index is unalignable to the frame index.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.at" title="apache_beam.dataframe.frames.DeferredDataFrame.at"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.at</span></code></a></dt>
<dd>Access a single value for a row/column label pair.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.iloc" title="apache_beam.dataframe.frames.DeferredDataFrame.iloc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.iloc</span></code></a></dt>
<dd>Access group of rows and columns by integer position(s).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.xs" title="apache_beam.dataframe.frames.DeferredDataFrame.xs"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.xs</span></code></a></dt>
<dd>Returns a cross-section (row(s) or column(s)) from the DeferredSeries/DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.loc" title="apache_beam.dataframe.frames.DeferredSeries.loc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredSeries.loc</span></code></a></dt>
<dd>Access group of values using labels.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">**Getting values**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">],</span> <span class="p">[</span><span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;sidewinder&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;max_speed&#39;</span><span class="p">,</span> <span class="s1">&#39;shield&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra 1 2</span>
<span class="go">viper 4 5</span>
<span class="go">sidewinder 7 8</span>
<span class="go">Single label. Note this returns the row as a Series.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">&#39;viper&#39;</span><span class="p">]</span>
<span class="go">max_speed 4</span>
<span class="go">shield 5</span>
<span class="go">Name: viper, dtype: int64</span>
<span class="go">List of labels. Note using ``[[]]`` returns a DataFrame.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[[</span><span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;sidewinder&#39;</span><span class="p">]]</span>
<span class="go"> max_speed shield</span>
<span class="go">viper 4 5</span>
<span class="go">sidewinder 7 8</span>
<span class="go">Single label for row and column</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;shield&#39;</span><span class="p">]</span>
<span class="go">2</span>
<span class="go">Slice with labels for row and single label for column. As mentioned</span>
<span class="go">above, note that both the start and stop of the slice are included.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">&#39;cobra&#39;</span><span class="p">:</span><span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;max_speed&#39;</span><span class="p">]</span>
<span class="go">cobra 1</span>
<span class="go">viper 4</span>
<span class="go">Name: max_speed, dtype: int64</span>
<span class="go">Boolean list with the same length as the row axis</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[[</span><span class="kc">False</span><span class="p">,</span> <span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">]]</span>
<span class="go"> max_speed shield</span>
<span class="go">sidewinder 7 8</span>
<span class="go">Alignable boolean Series:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;sidewinder&#39;</span><span class="p">,</span> <span class="s1">&#39;cobra&#39;</span><span class="p">])]</span>
<span class="go"> max_speed shield</span>
<span class="go">sidewinder 7 8</span>
<span class="go">Index (same behavior as ``df.reindex``)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">Index</span><span class="p">([</span><span class="s2">&quot;cobra&quot;</span><span class="p">,</span> <span class="s2">&quot;viper&quot;</span><span class="p">],</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;foo&quot;</span><span class="p">)]</span>
<span class="go"> max_speed shield</span>
<span class="go">foo</span>
<span class="go">cobra 1 2</span>
<span class="go">viper 4 5</span>
<span class="go">Conditional that returns a boolean Series</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">df</span><span class="p">[</span><span class="s1">&#39;shield&#39;</span><span class="p">]</span> <span class="o">&gt;</span> <span class="mi">6</span><span class="p">]</span>
<span class="go"> max_speed shield</span>
<span class="go">sidewinder 7 8</span>
<span class="go">Conditional that returns a boolean Series with column labels specified</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">df</span><span class="p">[</span><span class="s1">&#39;shield&#39;</span><span class="p">]</span> <span class="o">&gt;</span> <span class="mi">6</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;max_speed&#39;</span><span class="p">]]</span>
<span class="go"> max_speed</span>
<span class="go">sidewinder 7</span>
<span class="go">Callable that returns a boolean Series</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="k">lambda</span> <span class="n">df</span><span class="p">:</span> <span class="n">df</span><span class="p">[</span><span class="s1">&#39;shield&#39;</span><span class="p">]</span> <span class="o">==</span> <span class="mi">8</span><span class="p">]</span>
<span class="go"> max_speed shield</span>
<span class="go">sidewinder 7 8</span>
<span class="go">**Setting values**</span>
<span class="go">Set value for all items matching the list of labels</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[[</span><span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;sidewinder&#39;</span><span class="p">],</span> <span class="p">[</span><span class="s1">&#39;shield&#39;</span><span class="p">]]</span> <span class="o">=</span> <span class="mi">50</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra 1 2</span>
<span class="go">viper 4 50</span>
<span class="go">sidewinder 7 50</span>
<span class="go">Set value for an entire row</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">&#39;cobra&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="mi">10</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra 10 10</span>
<span class="go">viper 4 50</span>
<span class="go">sidewinder 7 50</span>
<span class="go">Set value for an entire column</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="s1">&#39;max_speed&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="mi">30</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra 30 10</span>
<span class="go">viper 30 50</span>
<span class="go">sidewinder 30 50</span>
<span class="go">Set value for rows matching callable condition</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">df</span><span class="p">[</span><span class="s1">&#39;shield&#39;</span><span class="p">]</span> <span class="o">&gt;</span> <span class="mi">35</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra 30 10</span>
<span class="go">viper 0 0</span>
<span class="go">sidewinder 0 0</span>
<span class="go">**Getting values on a DataFrame with an index that has integer labels**</span>
<span class="go">Another example using integers for the index</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">],</span> <span class="p">[</span><span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">],</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;max_speed&#39;</span><span class="p">,</span> <span class="s1">&#39;shield&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> max_speed shield</span>
<span class="go">7 1 2</span>
<span class="go">8 4 5</span>
<span class="go">9 7 8</span>
<span class="go">Slice with integer labels for rows. As mentioned above, note that both</span>
<span class="go">the start and stop of the slice are included.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="mi">7</span><span class="p">:</span><span class="mi">9</span><span class="p">]</span>
<span class="go"> max_speed shield</span>
<span class="go">7 1 2</span>
<span class="go">8 4 5</span>
<span class="go">9 7 8</span>
<span class="go">**Getting values with a MultiIndex**</span>
<span class="go">A number of examples using a DataFrame with a MultiIndex</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">tuples</span> <span class="o">=</span> <span class="p">[</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark i&#39;</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark ii&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;sidewinder&#39;</span><span class="p">,</span> <span class="s1">&#39;mark i&#39;</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;sidewinder&#39;</span><span class="p">,</span> <span class="s1">&#39;mark ii&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;mark ii&#39;</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;mark iii&#39;</span><span class="p">)</span>
<span class="gp">... </span><span class="p">]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">index</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_tuples</span><span class="p">(</span><span class="n">tuples</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">values</span> <span class="o">=</span> <span class="p">[[</span><span class="mi">12</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="p">[</span><span class="mi">10</span><span class="p">,</span> <span class="mi">20</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="p">[</span><span class="mi">7</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">16</span><span class="p">,</span> <span class="mi">36</span><span class="p">]]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">values</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;max_speed&#39;</span><span class="p">,</span> <span class="s1">&#39;shield&#39;</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="n">index</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra mark i 12 2</span>
<span class="go"> mark ii 0 4</span>
<span class="go">sidewinder mark i 10 20</span>
<span class="go"> mark ii 1 4</span>
<span class="go">viper mark ii 7 1</span>
<span class="go"> mark iii 16 36</span>
<span class="go">Single label. Note this returns a DataFrame with a single index.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">&#39;cobra&#39;</span><span class="p">]</span>
<span class="go"> max_speed shield</span>
<span class="go">mark i 12 2</span>
<span class="go">mark ii 0 4</span>
<span class="go">Single index tuple. Note this returns a Series.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[(</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark ii&#39;</span><span class="p">)]</span>
<span class="go">max_speed 0</span>
<span class="go">shield 4</span>
<span class="go">Name: (cobra, mark ii), dtype: int64</span>
<span class="go">Single label for row and column. Similar to passing in a tuple, this</span>
<span class="go">returns a Series.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark i&#39;</span><span class="p">]</span>
<span class="go">max_speed 12</span>
<span class="go">shield 2</span>
<span class="go">Name: (cobra, mark i), dtype: int64</span>
<span class="go">Single tuple. Note using ``[[]]`` returns a DataFrame.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[[(</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark ii&#39;</span><span class="p">)]]</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra mark ii 0 4</span>
<span class="go">Single tuple for the index with a single label for the column</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[(</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark i&#39;</span><span class="p">),</span> <span class="s1">&#39;shield&#39;</span><span class="p">]</span>
<span class="go">2</span>
<span class="go">Slice from index tuple to single label</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[(</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark i&#39;</span><span class="p">):</span><span class="s1">&#39;viper&#39;</span><span class="p">]</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra mark i 12 2</span>
<span class="go"> mark ii 0 4</span>
<span class="go">sidewinder mark i 10 20</span>
<span class="go"> mark ii 1 4</span>
<span class="go">viper mark ii 7 1</span>
<span class="go"> mark iii 16 36</span>
<span class="go">Slice from index tuple to index tuple</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[(</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark i&#39;</span><span class="p">):(</span><span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;mark ii&#39;</span><span class="p">)]</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra mark i 12 2</span>
<span class="go"> mark ii 0 4</span>
<span class="go">sidewinder mark i 10 20</span>
<span class="go"> mark ii 1 4</span>
<span class="go">viper mark ii 7 1</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.lt">
<code class="descname">lt</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.lt" title="Permalink to this definition"></a></dt>
<dd><p>Return Less than of series and other, element-wise (binary operator <cite>lt</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">&lt;</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">e 1.0</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;f&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 0.0</span>
<span class="go">b 1.0</span>
<span class="go">c 2.0</span>
<span class="go">d NaN</span>
<span class="go">f 1.0</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">lt</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a False</span>
<span class="go">b False</span>
<span class="go">c True</span>
<span class="go">d False</span>
<span class="go">e False</span>
<span class="go">f True</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.mask">
<code class="descname">mask</code><span class="sig-paren">(</span><em>cond</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.mask" title="Permalink to this definition"></a></dt>
<dd><p>mask is not parallelizable when <code class="docutils literal notranslate"><span class="pre">errors=&quot;ignore&quot;</span></code> is specified.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.mod">
<code class="descname">mod</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.mod" title="Permalink to this definition"></a></dt>
<dd><p>Return Modulo of series and other, element-wise (binary operator <cite>mod</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">%</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rmod" title="apache_beam.dataframe.frames.DeferredSeries.rmod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.rmod()</span></code></a></dt>
<dd>Reverse of the Modulo operator, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">mod</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 0.0</span>
<span class="go">b NaN</span>
<span class="go">c NaN</span>
<span class="go">d 0.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.mul">
<code class="descname">mul</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.mul" title="Permalink to this definition"></a></dt>
<dd><p>Return Multiplication of series and other, element-wise (binary operator <cite>mul</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">*</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rmul" title="apache_beam.dataframe.frames.DeferredSeries.rmul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.rmul()</span></code></a></dt>
<dd>Reverse of the Multiplication operator, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">multiply</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 1.0</span>
<span class="go">b 0.0</span>
<span class="go">c 0.0</span>
<span class="go">d 0.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.multiply">
<code class="descname">multiply</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.multiply" title="Permalink to this definition"></a></dt>
<dd><p>Return Multiplication of series and other, element-wise (binary operator <cite>mul</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">*</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rmul" title="apache_beam.dataframe.frames.DeferredSeries.rmul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.rmul()</span></code></a></dt>
<dd>Reverse of the Multiplication operator, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">multiply</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 1.0</span>
<span class="go">b 0.0</span>
<span class="go">c 0.0</span>
<span class="go">d 0.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.ndim">
<code class="descname">ndim</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.ndim" title="Permalink to this definition"></a></dt>
<dd><p>Return an int representing the number of axes / array dimensions.</p>
<p>Return 1 if Series. Otherwise return 2 if DataFrame.</p>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">ndarray.ndim</span></code></dt>
<dd>Number of array dimensions.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">({</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="mi">3</span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">ndim</span>
<span class="go">1</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">ndim</span>
<span class="go">2</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.ne">
<code class="descname">ne</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.ne" title="Permalink to this definition"></a></dt>
<dd><p>Return Not equal to of series and other, element-wise (binary operator <cite>ne</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">!=</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">ne</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a False</span>
<span class="go">b True</span>
<span class="go">c True</span>
<span class="go">d True</span>
<span class="go">e True</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.pad">
<code class="descname">pad</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.pad" title="Permalink to this definition"></a></dt>
<dd><p>Synonym for <code class="xref py py-meth docutils literal notranslate"><span class="pre">DataFrame.fillna()</span></code> with <code class="docutils literal notranslate"><span class="pre">method='ffill'</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Object with missing values filled or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">DeferredSeries/DeferredDataFrame or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>pad is only supported for axis=”columns”. axis=”index” is order-sensitive.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.pipe">
<code class="descname">pipe</code><span class="sig-paren">(</span><em>func</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.pipe" title="Permalink to this definition"></a></dt>
<dd><p>Apply func(self, *args, **kwargs).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>func</strong> (<em>function</em>) – Function to apply to the DeferredSeries/DeferredDataFrame.
<code class="docutils literal notranslate"><span class="pre">args</span></code>, and <code class="docutils literal notranslate"><span class="pre">kwargs</span></code> are passed into <code class="docutils literal notranslate"><span class="pre">func</span></code>.
Alternatively a <code class="docutils literal notranslate"><span class="pre">(callable,</span> <span class="pre">data_keyword)</span></code> tuple where
<code class="docutils literal notranslate"><span class="pre">data_keyword</span></code> is a string indicating the keyword of
<code class="docutils literal notranslate"><span class="pre">callable</span></code> that expects the DeferredSeries/DeferredDataFrame.</li>
<li><strong>args</strong> (<em>iterable</em><em>, </em><em>optional</em>) – Positional arguments passed into <code class="docutils literal notranslate"><span class="pre">func</span></code>.</li>
<li><strong>kwargs</strong> (<em>mapping</em><em>, </em><em>optional</em>) – A dictionary of keyword arguments passed into <code class="docutils literal notranslate"><span class="pre">func</span></code>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><strong>object</strong></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">the return type of <code class="docutils literal notranslate"><span class="pre">func</span></code>.</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.apply" title="apache_beam.dataframe.frames.DeferredDataFrame.apply"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.apply()</span></code></a></dt>
<dd>Apply a function along input axis of DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.applymap" title="apache_beam.dataframe.frames.DeferredDataFrame.applymap"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.applymap()</span></code></a></dt>
<dd>Apply a function elementwise on a whole DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.map" title="apache_beam.dataframe.frames.DeferredSeries.map"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.map()</span></code></a></dt>
<dd>Apply a mapping correspondence on a <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredSeries</span></code>.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Use <code class="docutils literal notranslate"><span class="pre">.pipe</span></code> when chaining together functions that expect
DeferredSeries, DeferredDataFrames or GroupBy objects. Instead of writing</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">func</span><span class="p">(</span><span class="n">g</span><span class="p">(</span><span class="n">h</span><span class="p">(</span><span class="n">df</span><span class="p">),</span> <span class="n">arg1</span><span class="o">=</span><span class="n">a</span><span class="p">),</span> <span class="n">arg2</span><span class="o">=</span><span class="n">b</span><span class="p">,</span> <span class="n">arg3</span><span class="o">=</span><span class="n">c</span><span class="p">)</span> <span class="c1"># doctest: +SKIP</span>
</pre></div>
</div>
<p>You can write</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">pipe</span><span class="p">(</span><span class="n">h</span><span class="p">)</span>
<span class="gp">... </span> <span class="o">.</span><span class="n">pipe</span><span class="p">(</span><span class="n">g</span><span class="p">,</span> <span class="n">arg1</span><span class="o">=</span><span class="n">a</span><span class="p">)</span>
<span class="gp">... </span> <span class="o">.</span><span class="n">pipe</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">arg2</span><span class="o">=</span><span class="n">b</span><span class="p">,</span> <span class="n">arg3</span><span class="o">=</span><span class="n">c</span><span class="p">)</span>
<span class="gp">... </span><span class="p">)</span> <span class="c1"># doctest: +SKIP</span>
</pre></div>
</div>
<p>If you have a function that takes the data as (say) the second
argument, pass a tuple indicating which keyword expects the
data. For example, suppose <code class="docutils literal notranslate"><span class="pre">f</span></code> takes its data as <code class="docutils literal notranslate"><span class="pre">arg2</span></code>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">pipe</span><span class="p">(</span><span class="n">h</span><span class="p">)</span>
<span class="gp">... </span> <span class="o">.</span><span class="n">pipe</span><span class="p">(</span><span class="n">g</span><span class="p">,</span> <span class="n">arg1</span><span class="o">=</span><span class="n">a</span><span class="p">)</span>
<span class="gp">... </span> <span class="o">.</span><span class="n">pipe</span><span class="p">((</span><span class="n">func</span><span class="p">,</span> <span class="s1">&#39;arg2&#39;</span><span class="p">),</span> <span class="n">arg1</span><span class="o">=</span><span class="n">a</span><span class="p">,</span> <span class="n">arg3</span><span class="o">=</span><span class="n">c</span><span class="p">)</span>
<span class="gp">... </span> <span class="p">)</span> <span class="c1"># doctest: +SKIP</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.pow">
<code class="descname">pow</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.pow" title="Permalink to this definition"></a></dt>
<dd><p>Return Exponential power of series and other, element-wise (binary operator <cite>pow</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">**</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rpow" title="apache_beam.dataframe.frames.DeferredSeries.rpow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.rpow()</span></code></a></dt>
<dd>Reverse of the Exponential power operator, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">pow</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d 0.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.radd">
<code class="descname">radd</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.radd" title="Permalink to this definition"></a></dt>
<dd><p>Return Addition of series and other, element-wise (binary operator <cite>radd</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">+</span> <span class="pre">series</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.add" title="apache_beam.dataframe.frames.DeferredSeries.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.add()</span></code></a></dt>
<dd>Element-wise Addition, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 2.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.rank">
<code class="descname">rank</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.rank" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.rank.html#pandas.Series.rank" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.rank()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘rank’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.rdiv">
<code class="descname">rdiv</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.rdiv" title="Permalink to this definition"></a></dt>
<dd><p>Return Floating division of series and other, element-wise (binary operator <cite>rtruediv</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">/</span> <span class="pre">series</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.truediv" title="apache_beam.dataframe.frames.DeferredSeries.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.truediv()</span></code></a></dt>
<dd>Element-wise Floating division, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">divide</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 1.0</span>
<span class="go">b inf</span>
<span class="go">c inf</span>
<span class="go">d 0.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.rdivmod">
<code class="descname">rdivmod</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.rdivmod" title="Permalink to this definition"></a></dt>
<dd><p>Return Integer division and modulo of series and other, element-wise (binary operator <cite>rdivmod</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">divmod</span> <span class="pre">series</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">2-Tuple of DeferredSeries</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.divmod" title="apache_beam.dataframe.frames.DeferredSeries.divmod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.divmod()</span></code></a></dt>
<dd>Element-wise Integer division and modulo, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">divmod</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">(a 1.0</span>
<span class="go"> b NaN</span>
<span class="go"> c NaN</span>
<span class="go"> d 0.0</span>
<span class="go"> e NaN</span>
<span class="go"> dtype: float64,</span>
<span class="go"> a 0.0</span>
<span class="go"> b NaN</span>
<span class="go"> c NaN</span>
<span class="go"> d 0.0</span>
<span class="go"> e NaN</span>
<span class="go"> dtype: float64)</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.reindex">
<code class="descname">reindex</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.reindex" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.reindex.html#pandas.DataFrame.reindex" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.reindex()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.reindex_like">
<code class="descname">reindex_like</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.reindex_like" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.reindex_like.html#pandas.Series.reindex_like" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.reindex_like()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘reindex_like’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.reorder_levels">
<code class="descname">reorder_levels</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.reorder_levels" title="Permalink to this definition"></a></dt>
<dd><p>Rearrange index levels using input order. May not drop or duplicate levels.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>order</strong> (<em>list of int</em><em> or </em><em>list of str</em>) – List representing new level order. Reference level by number
(position) or by key (label).</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – Where to reorder levels.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.replace">
<code class="descname">replace</code><span class="sig-paren">(</span><em>to_replace</em>, <em>value</em>, <em>limit</em>, <em>method</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.replace" title="Permalink to this definition"></a></dt>
<dd><p>Replace values given in <cite>to_replace</cite> with <cite>value</cite>.</p>
<p>Values of the DataFrame are replaced with other values dynamically.</p>
<p>This differs from updating with <code class="docutils literal notranslate"><span class="pre">.loc</span></code> or <code class="docutils literal notranslate"><span class="pre">.iloc</span></code>, which require
you to specify a location to update with some value.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>to_replace</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>regex</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em>, or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a>) – <p>How to find the values that will be replaced.</p>
<ul>
<li>numeric, str or regex:<blockquote>
<div><ul>
<li><dl class="first docutils">
<dt>numeric: numeric values equal to <cite>to_replace</cite> will be</dt>
<dd>replaced with <cite>value</cite></dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>str: string exactly matching <cite>to_replace</cite> will be replaced</dt>
<dd>with <cite>value</cite></dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>regex: regexs matching <cite>to_replace</cite> will be replaced with</dt>
<dd><cite>value</cite></dd>
</dl>
</li>
</ul>
</div></blockquote>
</li>
<li>list of str, regex, or numeric:<blockquote>
<div><ul>
<li><dl class="first docutils">
<dt>First, if <cite>to_replace</cite> and <cite>value</cite> are both lists, they</dt>
<dd><strong>must</strong> be the same length.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>Second, if <code class="docutils literal notranslate"><span class="pre">regex=True</span></code> then all of the strings in <strong>both</strong></dt>
<dd>lists will be interpreted as regexs otherwise they will match
directly. This doesn’t matter much for <cite>value</cite> since there
are only a few possible substitution regexes you can use.</dd>
</dl>
</li>
<li>str, regex and numeric rules apply as above.</li>
</ul>
</div></blockquote>
</li>
<li>dict:<blockquote>
<div><ul>
<li><dl class="first docutils">
<dt>Dicts can be used to specify different replacement values</dt>
<dd>for different existing values. For example,
<code class="docutils literal notranslate"><span class="pre">{'a':</span> <span class="pre">'b',</span> <span class="pre">'y':</span> <span class="pre">'z'}</span></code> replaces the value ‘a’ with ‘b’ and
‘y’ with ‘z’. To use a dict in this way the <cite>value</cite>
parameter should be <cite>None</cite>.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>For a DeferredDataFrame a dict can specify that different values</dt>
<dd>should be replaced in different columns. For example,
<code class="docutils literal notranslate"><span class="pre">{'a':</span> <span class="pre">1,</span> <span class="pre">'b':</span> <span class="pre">'z'}</span></code> looks for the value 1 in column ‘a’
and the value ‘z’ in column ‘b’ and replaces these values
with whatever is specified in <cite>value</cite>. The <cite>value</cite> parameter
should not be <code class="docutils literal notranslate"><span class="pre">None</span></code> in this case. You can treat this as a
special case of passing two lists except that you are
specifying the column to search in.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>For a DeferredDataFrame nested dictionaries, e.g.,</dt>
<dd><code class="docutils literal notranslate"><span class="pre">{'a':</span> <span class="pre">{'b':</span> <span class="pre">np.nan}}</span></code>, are read as follows: look in column
‘a’ for the value ‘b’ and replace it with NaN. The <cite>value</cite>
parameter should be <code class="docutils literal notranslate"><span class="pre">None</span></code> to use a nested dict in this
way. You can nest regular expressions as well. Note that
column names (the top-level dictionary keys in a nested
dictionary) <strong>cannot</strong> be regular expressions.</dd>
</dl>
</li>
</ul>
</div></blockquote>
</li>
<li>None:<blockquote>
<div><ul>
<li><dl class="first docutils">
<dt>This means that the <cite>regex</cite> argument must be a string,</dt>
<dd>compiled regular expression, or list, dict, ndarray or
DeferredSeries of such elements. If <cite>value</cite> is also <code class="docutils literal notranslate"><span class="pre">None</span></code> then
this <strong>must</strong> be a nested dictionary or DeferredSeries.</dd>
</dl>
</li>
</ul>
</div></blockquote>
</li>
</ul>
<p>See the examples section for examples of each of these.</p>
</li>
<li><strong>value</strong> (<em>scalar</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>regex</em><em>, </em><em>default None</em>) – Value to replace any values matching <cite>to_replace</cite> with.
For a DeferredDataFrame a dict of values can be used to specify which
value to use for each column (columns not in the dict will not be
filled). Regular expressions, strings and lists or dicts of such
objects are also allowed.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If True, performs operation inplace and returns None.</li>
<li><strong>limit</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default None</em>) – Maximum size gap to forward or backward fill.</li>
<li><strong>regex</strong> (bool or same types as <cite>to_replace</cite>, default False) – Whether to interpret <cite>to_replace</cite> and/or <cite>value</cite> as regular
expressions. If this is <code class="docutils literal notranslate"><span class="pre">True</span></code> then <cite>to_replace</cite> <em>must</em> be a
string. Alternatively, this could be a regular expression or a
list, dict, or array of regular expressions in which case
<cite>to_replace</cite> must be <code class="docutils literal notranslate"><span class="pre">None</span></code>.</li>
<li><strong>method</strong> ({‘pad’, ‘ffill’, ‘bfill’, <cite>None</cite>}) – <p>The method to use when for replacement, when <cite>to_replace</cite> is a
scalar, list or tuple and <cite>value</cite> is <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 0.23.0: </span>Added to DeferredDataFrame.</p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Object after replacement.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><ul class="first last">
<li><dl class="first docutils">
<dt><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#AssertionError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">AssertionError</span></code></a> – * If <cite>regex</cite> is not a <code class="docutils literal notranslate"><span class="pre">bool</span></code> and <cite>to_replace</cite> is not</dt>
<dd><p class="first last"><code class="docutils literal notranslate"><span class="pre">None</span></code>.</p>
</dd>
</dl>
</li>
<li><p class="first"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#TypeError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> – * If <cite>to_replace</cite> is not a scalar, array-like, <code class="docutils literal notranslate"><span class="pre">dict</span></code>, or <code class="docutils literal notranslate"><span class="pre">None</span></code>
* If <cite>to_replace</cite> is a <code class="docutils literal notranslate"><span class="pre">dict</span></code> and <cite>value</cite> is not a <code class="docutils literal notranslate"><span class="pre">list</span></code>,</p>
<blockquote>
<div><p><code class="docutils literal notranslate"><span class="pre">dict</span></code>, <code class="docutils literal notranslate"><span class="pre">ndarray</span></code>, or <code class="docutils literal notranslate"><span class="pre">DeferredSeries</span></code></p>
</div></blockquote>
<ul class="simple">
<li><dl class="first docutils">
<dt>If <cite>to_replace</cite> is <code class="docutils literal notranslate"><span class="pre">None</span></code> and <cite>regex</cite> is not compilable</dt>
<dd>into a regular expression or is a list, dict, ndarray, or
DeferredSeries.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>When replacing multiple <code class="docutils literal notranslate"><span class="pre">bool</span></code> or <code class="docutils literal notranslate"><span class="pre">datetime64</span></code> objects and</dt>
<dd>the arguments to <cite>to_replace</cite> does not match the type of the
value being replaced</dd>
</dl>
</li>
</ul>
</li>
<li><dl class="first docutils">
<dt><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#ValueError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> – * If a <code class="docutils literal notranslate"><span class="pre">list</span></code> or an <code class="docutils literal notranslate"><span class="pre">ndarray</span></code> is passed to <cite>to_replace</cite> and</dt>
<dd><p class="first last"><cite>value</cite> but they are not the same length.</p>
</dd>
</dl>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">method</span></code> is not supported in the Beam DataFrame API because it is
order-sensitive. It cannot be specified.</p>
<p>If <code class="docutils literal notranslate"><span class="pre">limit</span></code> is specified this operation is not parallelizable.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.fillna" title="apache_beam.dataframe.frames.DeferredDataFrame.fillna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.fillna()</span></code></a></dt>
<dd>Fill NA values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.where" title="apache_beam.dataframe.frames.DeferredDataFrame.where"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.where()</span></code></a></dt>
<dd>Replace values based on boolean condition.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.str.replace()</span></code></dt>
<dd>Simple string replacement.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<ul class="simple">
<li><dl class="first docutils">
<dt>Regex substitution is performed under the hood with <code class="docutils literal notranslate"><span class="pre">re.sub</span></code>. The</dt>
<dd>rules for substitution for <code class="docutils literal notranslate"><span class="pre">re.sub</span></code> are the same.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>Regular expressions will only substitute on strings, meaning you</dt>
<dd>cannot provide, for example, a regular expression matching floating
point numbers and expect the columns in your frame that have a
numeric dtype to be matched. However, if those floating point
numbers <em>are</em> strings, then you can do this.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>This method has <em>a lot</em> of options. You are encouraged to experiment</dt>
<dd>and play with this method to gain intuition about how it works.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>When dict is used as the <cite>to_replace</cite> value, it is like</dt>
<dd>key(s) in the dict are the to_replace part and
value(s) in the dict are the value parameter.</dd>
</dl>
</li>
</ul>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">**Scalar `to_replace` and `value`**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
<span class="go">0 5</span>
<span class="go">1 1</span>
<span class="go">2 2</span>
<span class="go">3 3</span>
<span class="go">4 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;C&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 5 5 a</span>
<span class="go">1 1 6 b</span>
<span class="go">2 2 7 c</span>
<span class="go">3 3 8 d</span>
<span class="go">4 4 9 e</span>
<span class="go">**List-like `to_replace`**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="mi">4</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 4 5 a</span>
<span class="go">1 4 6 b</span>
<span class="go">2 4 7 c</span>
<span class="go">3 4 8 d</span>
<span class="go">4 4 9 e</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span>
<span class="go"> A B C</span>
<span class="go">0 4 5 a</span>
<span class="go">1 3 6 b</span>
<span class="go">2 2 7 c</span>
<span class="go">3 1 8 d</span>
<span class="go">4 4 9 e</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">replace</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">method</span><span class="o">=</span><span class="s1">&#39;bfill&#39;</span><span class="p">)</span>
<span class="go">0 0</span>
<span class="go">1 3</span>
<span class="go">2 3</span>
<span class="go">3 3</span>
<span class="go">4 4</span>
<span class="go">dtype: int64</span>
<span class="go">**dict-like `to_replace`**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">({</span><span class="mi">0</span><span class="p">:</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="mi">100</span><span class="p">})</span>
<span class="go"> A B C</span>
<span class="go">0 10 5 a</span>
<span class="go">1 100 6 b</span>
<span class="go">2 2 7 c</span>
<span class="go">3 3 8 d</span>
<span class="go">4 4 9 e</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="mi">5</span><span class="p">},</span> <span class="mi">100</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 100 100 a</span>
<span class="go">1 1 6 b</span>
<span class="go">2 2 7 c</span>
<span class="go">3 3 8 d</span>
<span class="go">4 4 9 e</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">4</span><span class="p">:</span> <span class="mi">400</span><span class="p">}})</span>
<span class="go"> A B C</span>
<span class="go">0 100 5 a</span>
<span class="go">1 1 6 b</span>
<span class="go">2 2 7 c</span>
<span class="go">3 3 8 d</span>
<span class="go">4 400 9 e</span>
<span class="go">**Regular expression `to_replace`**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;bat&#39;</span><span class="p">,</span> <span class="s1">&#39;foo&#39;</span><span class="p">,</span> <span class="s1">&#39;bait&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;abc&#39;</span><span class="p">,</span> <span class="s1">&#39;bar&#39;</span><span class="p">,</span> <span class="s1">&#39;xyz&#39;</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">to_replace</span><span class="o">=</span><span class="sa">r</span><span class="s1">&#39;^ba.$&#39;</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="s1">&#39;new&#39;</span><span class="p">,</span> <span class="n">regex</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 new abc</span>
<span class="go">1 foo new</span>
<span class="go">2 bait xyz</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="sa">r</span><span class="s1">&#39;^ba.$&#39;</span><span class="p">},</span> <span class="p">{</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="s1">&#39;new&#39;</span><span class="p">},</span> <span class="n">regex</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 new abc</span>
<span class="go">1 foo bar</span>
<span class="go">2 bait xyz</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">regex</span><span class="o">=</span><span class="sa">r</span><span class="s1">&#39;^ba.$&#39;</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="s1">&#39;new&#39;</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 new abc</span>
<span class="go">1 foo new</span>
<span class="go">2 bait xyz</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">regex</span><span class="o">=</span><span class="p">{</span><span class="sa">r</span><span class="s1">&#39;^ba.$&#39;</span><span class="p">:</span> <span class="s1">&#39;new&#39;</span><span class="p">,</span> <span class="s1">&#39;foo&#39;</span><span class="p">:</span> <span class="s1">&#39;xyz&#39;</span><span class="p">})</span>
<span class="go"> A B</span>
<span class="go">0 new abc</span>
<span class="go">1 xyz new</span>
<span class="go">2 bait xyz</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">regex</span><span class="o">=</span><span class="p">[</span><span class="sa">r</span><span class="s1">&#39;^ba.$&#39;</span><span class="p">,</span> <span class="s1">&#39;foo&#39;</span><span class="p">],</span> <span class="n">value</span><span class="o">=</span><span class="s1">&#39;new&#39;</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 new abc</span>
<span class="go">1 new new</span>
<span class="go">2 bait xyz</span>
<span class="go">Compare the behavior of ``s.replace({&#39;a&#39;: None})`` and</span>
<span class="go">``s.replace(&#39;a&#39;, None)`` to understand the peculiarities</span>
<span class="go">of the `to_replace` parameter:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">10</span><span class="p">,</span> <span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;a&#39;</span><span class="p">])</span>
<span class="go">When one uses a dict as the `to_replace` value, it is like the</span>
<span class="go">value(s) in the dict are equal to the `value` parameter.</span>
<span class="go">``s.replace({&#39;a&#39;: None})`` is equivalent to</span>
<span class="go">``s.replace(to_replace={&#39;a&#39;: None}, value=None, method=None)``:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">replace</span><span class="p">({</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="kc">None</span><span class="p">})</span>
<span class="go">0 10</span>
<span class="go">1 None</span>
<span class="go">2 None</span>
<span class="go">3 b</span>
<span class="go">4 None</span>
<span class="go">dtype: object</span>
<span class="go">When ``value=None`` and `to_replace` is a scalar, list or</span>
<span class="go">tuple, `replace` uses the method parameter (default &#39;pad&#39;) to do the</span>
<span class="go">replacement. So this is why the &#39;a&#39; values are being replaced by 10</span>
<span class="go">in rows 1 and 2 and &#39;b&#39; in row 4 in this case.</span>
<span class="go">The command ``s.replace(&#39;a&#39;, None)`` is actually equivalent to</span>
<span class="go">``s.replace(to_replace=&#39;a&#39;, value=None, method=&#39;pad&#39;)``:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="go">0 10</span>
<span class="go">1 10</span>
<span class="go">2 10</span>
<span class="go">3 b</span>
<span class="go">4 b</span>
<span class="go">dtype: object</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.resample">
<code class="descname">resample</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.resample" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.resample.html#pandas.DataFrame.resample" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.resample()</span></code></a> is not yet supported in the Beam DataFrame API because implementing it would require integrating with Beam event-time semantics</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-event-time-semantics">https://s.apache.org/dataframe-event-time-semantics</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.reset_index">
<code class="descname">reset_index</code><span class="sig-paren">(</span><em>level=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.reset_index" title="Permalink to this definition"></a></dt>
<dd><p>Reset the index, or a level of it.</p>
<p>Reset the index of the DataFrame, and use the default one instead.
If the DataFrame has a MultiIndex, this method can remove one or more
levels.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.10)"><em>tuple</em></a><em>, or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, </em><em>default None</em>) – Only remove the given levels from the index. Removes all levels by
default.</li>
<li><strong>drop</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Do not try to insert index into dataframe columns. This resets
the index to the default integer index.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Modify the DeferredDataFrame in place (do not create a new object).</li>
<li><strong>col_level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default 0</em>) – If the columns have multiple levels, determines which level the
labels are inserted into. By default it is inserted into the first
level.</li>
<li><strong>col_fill</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><em>object</em></a><em>, </em><em>default ''</em>) – If the columns have multiple levels, determines how the other
levels are named. If None then the index name is repeated.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">DeferredDataFrame with the new index or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Dropping the entire index (e.g. with <code class="docutils literal notranslate"><span class="pre">reset_index(level=None)</span></code>) is
not parallelizable. It is also only guaranteed that the newly generated
index values will be unique. The Beam DataFrame API makes no guarantee
that the same index values as the equivalent pandas operation will be
generated, because that implementation is order-sensitive.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.set_index" title="apache_beam.dataframe.frames.DeferredDataFrame.set_index"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.set_index()</span></code></a></dt>
<dd>Opposite of reset_index.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.reindex" title="apache_beam.dataframe.frames.DeferredDataFrame.reindex"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.reindex()</span></code></a></dt>
<dd>Change to new indices or expand indices.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.reindex_like" title="apache_beam.dataframe.frames.DeferredDataFrame.reindex_like"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.reindex_like()</span></code></a></dt>
<dd>Change to same indices as other DeferredDataFrame.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([(</span><span class="s1">&#39;bird&#39;</span><span class="p">,</span> <span class="mf">389.0</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;bird&#39;</span><span class="p">,</span> <span class="mf">24.0</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="mf">80.5</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;parrot&#39;</span><span class="p">,</span> <span class="s1">&#39;lion&#39;</span><span class="p">,</span> <span class="s1">&#39;monkey&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;class&#39;</span><span class="p">,</span> <span class="s1">&#39;max_speed&#39;</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> class max_speed</span>
<span class="go">falcon bird 389.0</span>
<span class="go">parrot bird 24.0</span>
<span class="go">lion mammal 80.5</span>
<span class="go">monkey mammal NaN</span>
<span class="go">When we reset the index, the old index is added as a column, and a</span>
<span class="go">new sequential index is used:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">reset_index</span><span class="p">()</span>
<span class="go"> index class max_speed</span>
<span class="go">0 falcon bird 389.0</span>
<span class="go">1 parrot bird 24.0</span>
<span class="go">2 lion mammal 80.5</span>
<span class="go">3 monkey mammal NaN</span>
<span class="go">We can use the `drop` parameter to avoid the old index being added as</span>
<span class="go">a column:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">drop</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> class max_speed</span>
<span class="go">0 bird 389.0</span>
<span class="go">1 bird 24.0</span>
<span class="go">2 mammal 80.5</span>
<span class="go">3 mammal NaN</span>
<span class="go">You can also use `reset_index` with `MultiIndex`.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">index</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_tuples</span><span class="p">([(</span><span class="s1">&#39;bird&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;bird&#39;</span><span class="p">,</span> <span class="s1">&#39;parrot&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="s1">&#39;lion&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="s1">&#39;monkey&#39;</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">names</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;class&#39;</span><span class="p">,</span> <span class="s1">&#39;name&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">columns</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_tuples</span><span class="p">([(</span><span class="s1">&#39;speed&#39;</span><span class="p">,</span> <span class="s1">&#39;max&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;species&#39;</span><span class="p">,</span> <span class="s1">&#39;type&#39;</span><span class="p">)])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([(</span><span class="mf">389.0</span><span class="p">,</span> <span class="s1">&#39;fly&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span> <span class="mf">24.0</span><span class="p">,</span> <span class="s1">&#39;fly&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span> <span class="mf">80.5</span><span class="p">,</span> <span class="s1">&#39;run&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="s1">&#39;jump&#39;</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="n">index</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="n">columns</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> speed species</span>
<span class="go"> max type</span>
<span class="go">class name</span>
<span class="go">bird falcon 389.0 fly</span>
<span class="go"> parrot 24.0 fly</span>
<span class="go">mammal lion 80.5 run</span>
<span class="go"> monkey NaN jump</span>
<span class="go">If the index has multiple levels, we can reset a subset of them:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="s1">&#39;class&#39;</span><span class="p">)</span>
<span class="go"> class speed species</span>
<span class="go"> max type</span>
<span class="go">name</span>
<span class="go">falcon bird 389.0 fly</span>
<span class="go">parrot bird 24.0 fly</span>
<span class="go">lion mammal 80.5 run</span>
<span class="go">monkey mammal NaN jump</span>
<span class="go">If we are not dropping the index, by default, it is placed in the top</span>
<span class="go">level. We can place it in another level:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="s1">&#39;class&#39;</span><span class="p">,</span> <span class="n">col_level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> speed species</span>
<span class="go"> class max type</span>
<span class="go">name</span>
<span class="go">falcon bird 389.0 fly</span>
<span class="go">parrot bird 24.0 fly</span>
<span class="go">lion mammal 80.5 run</span>
<span class="go">monkey mammal NaN jump</span>
<span class="go">When the index is inserted under another level, we can specify under</span>
<span class="go">which one with the parameter `col_fill`:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="s1">&#39;class&#39;</span><span class="p">,</span> <span class="n">col_level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">col_fill</span><span class="o">=</span><span class="s1">&#39;species&#39;</span><span class="p">)</span>
<span class="go"> species speed species</span>
<span class="go"> class max type</span>
<span class="go">name</span>
<span class="go">falcon bird 389.0 fly</span>
<span class="go">parrot bird 24.0 fly</span>
<span class="go">lion mammal 80.5 run</span>
<span class="go">monkey mammal NaN jump</span>
<span class="go">If we specify a nonexistent level for `col_fill`, it is created:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="s1">&#39;class&#39;</span><span class="p">,</span> <span class="n">col_level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">col_fill</span><span class="o">=</span><span class="s1">&#39;genus&#39;</span><span class="p">)</span>
<span class="go"> genus speed species</span>
<span class="go"> class max type</span>
<span class="go">name</span>
<span class="go">falcon bird 389.0 fly</span>
<span class="go">parrot bird 24.0 fly</span>
<span class="go">lion mammal 80.5 run</span>
<span class="go">monkey mammal NaN jump</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.rfloordiv">
<code class="descname">rfloordiv</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.rfloordiv" title="Permalink to this definition"></a></dt>
<dd><p>Return Integer division of series and other, element-wise (binary operator <cite>rfloordiv</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">//</span> <span class="pre">series</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.floordiv" title="apache_beam.dataframe.frames.DeferredSeries.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.floordiv()</span></code></a></dt>
<dd>Element-wise Integer division, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">floordiv</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">c NaN</span>
<span class="go">d 0.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.rmod">
<code class="descname">rmod</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.rmod" title="Permalink to this definition"></a></dt>
<dd><p>Return Modulo of series and other, element-wise (binary operator <cite>rmod</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">%</span> <span class="pre">series</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.mod" title="apache_beam.dataframe.frames.DeferredSeries.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.mod()</span></code></a></dt>
<dd>Element-wise Modulo, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">mod</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 0.0</span>
<span class="go">b NaN</span>
<span class="go">c NaN</span>
<span class="go">d 0.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.rmul">
<code class="descname">rmul</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.rmul" title="Permalink to this definition"></a></dt>
<dd><p>Return Multiplication of series and other, element-wise (binary operator <cite>rmul</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">*</span> <span class="pre">series</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.mul" title="apache_beam.dataframe.frames.DeferredSeries.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.mul()</span></code></a></dt>
<dd>Element-wise Multiplication, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">multiply</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 1.0</span>
<span class="go">b 0.0</span>
<span class="go">c 0.0</span>
<span class="go">d 0.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.rolling">
<code class="descname">rolling</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.rolling" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.rolling.html#pandas.DataFrame.rolling" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.rolling()</span></code></a> is not yet supported in the Beam DataFrame API because implementing it would require integrating with Beam event-time semantics</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-event-time-semantics">https://s.apache.org/dataframe-event-time-semantics</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.rpow">
<code class="descname">rpow</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.rpow" title="Permalink to this definition"></a></dt>
<dd><p>Return Exponential power of series and other, element-wise (binary operator <cite>rpow</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">**</span> <span class="pre">series</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.pow" title="apache_beam.dataframe.frames.DeferredSeries.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.pow()</span></code></a></dt>
<dd>Element-wise Exponential power, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">pow</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d 0.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.rsub">
<code class="descname">rsub</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.rsub" title="Permalink to this definition"></a></dt>
<dd><p>Return Subtraction of series and other, element-wise (binary operator <cite>rsub</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">-</span> <span class="pre">series</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sub" title="apache_beam.dataframe.frames.DeferredSeries.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sub()</span></code></a></dt>
<dd>Element-wise Subtraction, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">subtract</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 0.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d -1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.rtruediv">
<code class="descname">rtruediv</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.rtruediv" title="Permalink to this definition"></a></dt>
<dd><p>Return Floating division of series and other, element-wise (binary operator <cite>rtruediv</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">/</span> <span class="pre">series</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.truediv" title="apache_beam.dataframe.frames.DeferredSeries.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.truediv()</span></code></a></dt>
<dd>Element-wise Floating division, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">divide</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 1.0</span>
<span class="go">b inf</span>
<span class="go">c inf</span>
<span class="go">d 0.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.set_flags">
<code class="descname">set_flags</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.set_flags" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.set_flags.html#pandas.Series.set_flags" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.set_flags()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘set_flags’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.size">
<code class="descname">size</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.size" title="Permalink to this definition"></a></dt>
<dd><p>Return an int representing the number of elements in this object.</p>
<p>Return the number of rows if Series. Otherwise return the number of
rows times number of columns if DataFrame.</p>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">ndarray.size</span></code></dt>
<dd>Number of elements in the array.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">({</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="mi">3</span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">size</span>
<span class="go">3</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">size</span>
<span class="go">4</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.sort_index">
<code class="descname">sort_index</code><span class="sig-paren">(</span><em>axis</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.sort_index" title="Permalink to this definition"></a></dt>
<dd><p>Sort object by labels (along an axis).</p>
<p>Returns a new DataFrame sorted by label if <cite>inplace</cite> argument is
<code class="docutils literal notranslate"><span class="pre">False</span></code>, otherwise updates the original DataFrame and returns None.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – The axis along which to sort. The value 0 identifies the rows,
and 1 identifies the columns.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em> or </em><em>list of ints</em><em> or </em><em>list of level names</em>) – If not None, sort on values in specified index level(s).</li>
<li><strong>ascending</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em> or </em><em>list-like of bools</em><em>, </em><em>default True</em>) – Sort ascending vs. descending. When the index is a MultiIndex the
sort direction can be controlled for each level individually.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If True, perform operation in-place.</li>
<li><strong>kind</strong> (<em>{'quicksort'</em><em>, </em><em>'mergesort'</em><em>, </em><em>'heapsort'</em><em>, </em><em>'stable'}</em><em>, </em><em>default 'quicksort'</em>) – Choice of sorting algorithm. See also <a class="reference external" href="https://numpy.org/doc/stable/reference/generated/numpy.sort.html#numpy.sort" title="(in NumPy v1.22)"><code class="xref py py-func docutils literal notranslate"><span class="pre">numpy.sort()</span></code></a> for more
information. <cite>mergesort</cite> and <cite>stable</cite> are the only stable algorithms. For
DeferredDataFrames, this option is only applied when sorting on a single
column or label.</li>
<li><strong>na_position</strong> (<em>{'first'</em><em>, </em><em>'last'}</em><em>, </em><em>default 'last'</em>) – Puts NaNs at the beginning if <cite>first</cite>; <cite>last</cite> puts NaNs at the end.
Not implemented for MultiIndex.</li>
<li><strong>sort_remaining</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – If True and sorting by level and index is multilevel, sort by other
levels too (in order) after sorting by specified level.</li>
<li><strong>ignore_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>If True, the resulting axis will be labeled 0, 1, …, n - 1.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.0.0.</span></p>
</div>
</li>
<li><strong>key</strong> (<em>callable</em><em>, </em><em>optional</em>) – <p>If not None, apply the key function to the index values
before sorting. This is similar to the <cite>key</cite> argument in the
builtin <code class="xref py py-meth docutils literal notranslate"><span class="pre">sorted()</span></code> function, with the notable difference that
this <cite>key</cite> function should be <em>vectorized</em>. It should expect an
<code class="docutils literal notranslate"><span class="pre">Index</span></code> and return an <code class="docutils literal notranslate"><span class="pre">Index</span></code> of the same shape. For MultiIndex
inputs, the key is applied <em>per level</em>.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The original DeferredDataFrame sorted by the labels or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">axis=index</span></code> is not allowed because it imposes an ordering on the
dataset, and we cannot guarantee it will be maintained (see
<a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>). Only
<code class="docutils literal notranslate"><span class="pre">axis=columns</span></code> is allowed.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sort_index" title="apache_beam.dataframe.frames.DeferredSeries.sort_index"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sort_index()</span></code></a></dt>
<dd>Sort DeferredSeries by the index.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sort_values" title="apache_beam.dataframe.frames.DeferredDataFrame.sort_values"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sort_values()</span></code></a></dt>
<dd>Sort DeferredDataFrame by the value.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sort_values" title="apache_beam.dataframe.frames.DeferredSeries.sort_values"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sort_values()</span></code></a></dt>
<dd>Sort DeferredSeries by the value.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">29</span><span class="p">,</span> <span class="mi">234</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">150</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sort_index</span><span class="p">()</span>
<span class="go"> A</span>
<span class="go">1 4</span>
<span class="go">29 2</span>
<span class="go">100 1</span>
<span class="go">150 5</span>
<span class="go">234 3</span>
<span class="go">By default, it sorts in ascending order, to sort in descending order,</span>
<span class="go">use ``ascending=False``</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sort_index</span><span class="p">(</span><span class="n">ascending</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go"> A</span>
<span class="go">234 3</span>
<span class="go">150 5</span>
<span class="go">100 1</span>
<span class="go">29 2</span>
<span class="go">1 4</span>
<span class="go">A key function can be specified which is applied to the index before</span>
<span class="go">sorting. For a ``MultiIndex`` this is applied to each level separately.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;a&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sort_index</span><span class="p">(</span><span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">str</span><span class="o">.</span><span class="n">lower</span><span class="p">())</span>
<span class="go"> a</span>
<span class="go">A 1</span>
<span class="go">b 2</span>
<span class="go">C 3</span>
<span class="go">d 4</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.sort_values">
<code class="descname">sort_values</code><span class="sig-paren">(</span><em>axis</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.sort_values" title="Permalink to this definition"></a></dt>
<dd><p><code class="docutils literal notranslate"><span class="pre">sort_values</span></code> is not implemented.</p>
<p>It is not implemented for <code class="docutils literal notranslate"><span class="pre">axis=index</span></code> because it imposes an ordering on
the dataset, and it likely will not be maintained (see
<a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>).</p>
<p>It is not implemented for <code class="docutils literal notranslate"><span class="pre">axis=columns</span></code> because it makes the order of
the columns depend on the data (see
<a class="reference external" href="https://s.apache.org/dataframe-non-deferred-columns">https://s.apache.org/dataframe-non-deferred-columns</a>).</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredSeries.sparse">
<code class="descname">sparse</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.sparse" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.sparse.html#pandas.DataFrame.sparse" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.sparse()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘sparse’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-12425">BEAM-12425</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.squeeze">
<code class="descname">squeeze</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.squeeze" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.squeeze.html#pandas.Series.squeeze" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.squeeze()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘squeeze’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.sub">
<code class="descname">sub</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.sub" title="Permalink to this definition"></a></dt>
<dd><p>Return Subtraction of series and other, element-wise (binary operator <cite>sub</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">-</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rsub" title="apache_beam.dataframe.frames.DeferredSeries.rsub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.rsub()</span></code></a></dt>
<dd>Reverse of the Subtraction operator, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">subtract</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 0.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d -1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.subtract">
<code class="descname">subtract</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.subtract" title="Permalink to this definition"></a></dt>
<dd><p>Return Subtraction of series and other, element-wise (binary operator <cite>sub</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">-</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rsub" title="apache_beam.dataframe.frames.DeferredSeries.rsub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.rsub()</span></code></a></dt>
<dd>Reverse of the Subtraction operator, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">subtract</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 0.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d -1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.swapaxes">
<code class="descname">swapaxes</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.swapaxes" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.swapaxes.html#pandas.Series.swapaxes" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.swapaxes()</span></code></a> is not yet supported in the Beam DataFrame API because the columns in the output DataFrame depend on the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-columns">https://s.apache.org/dataframe-non-deferred-columns</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.swaplevel">
<code class="descname">swaplevel</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.swaplevel" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.swaplevel.html#pandas.Series.swaplevel" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.swaplevel()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘swaplevel’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_clipboard">
<code class="descname">to_clipboard</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_clipboard" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_clipboard.html#pandas.DataFrame.to_clipboard" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_clipboard()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_clipboard’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_csv">
<code class="descname">to_csv</code><span class="sig-paren">(</span><em>path</em>, <em>transform_label=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_csv" title="Permalink to this definition"></a></dt>
<dd><p>Write object to a comma-separated values (csv) file.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>path_or_buf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>file handle</em><em>, </em><em>default None</em>) – <p>File path or object, if None is provided the result is returned as
a string. If a non-binary file object is passed, it should be opened
with <cite>newline=’’</cite>, disabling universal newlines. If a binary
file object is passed, <cite>mode</cite> might need to contain a <cite>‘b’</cite>.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.2.0: </span>Support for binary file objects was introduced.</p>
</div>
</li>
<li><strong>sep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default '</em><em>,</em><em>'</em>) – String of length 1. Field delimiter for the output file.</li>
<li><strong>na_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default ''</em>) – Missing data representation.</li>
<li><strong>float_format</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default None</em>) – Format string for floating point numbers.</li>
<li><strong>columns</strong> (<em>sequence</em><em>, </em><em>optional</em>) – Columns to write.</li>
<li><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em> or </em><em>list of str</em><em>, </em><em>default True</em>) – Write out the column names. If a list of strings is given it is
assumed to be aliases for the column names.</li>
<li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Write row names (index).</li>
<li><strong>index_label</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>sequence</em><em>, or </em><em>False</em><em>, </em><em>default None</em>) – Column label for index column(s) if desired. If None is given, and
<cite>header</cite> and <cite>index</cite> are True, then the index names are used. A
sequence should be given if the object uses MultiIndex. If
False do not print fields for index names. Use index_label=False
for easier importing in R.</li>
<li><strong>mode</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – Python write mode, default ‘w’.</li>
<li><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – A string representing the encoding to use in the output file,
defaults to ‘utf-8’. <cite>encoding</cite> is not supported if <cite>path_or_buf</cite>
is a non-binary file object.</li>
<li><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>If str, represents compression mode. If dict, value at ‘method’ is
the compression mode. Compression mode may be any of the following
possible values: {‘infer’, ‘gzip’, ‘bz2’, ‘zip’, ‘xz’, None}. If
compression mode is ‘infer’ and <cite>path_or_buf</cite> is path-like, then
detect compression mode from the following extensions: ‘.gz’,
‘.bz2’, ‘.zip’ or ‘.xz’. (otherwise no compression). If dict given
and mode is one of {‘zip’, ‘gzip’, ‘bz2’}, or inferred as
one of the above, other entries passed as
additional compression options.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.0.0: </span>May now be a dict with key ‘method’ as compression mode
and other entries as additional compression options if
compression mode is ‘zip’.</p>
</div>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.1.0: </span>Passing compression options as keys in dict is
supported for compression modes ‘gzip’ and ‘bz2’
as well as ‘zip’.</p>
</div>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.2.0: </span>Compression is supported for binary file objects.</p>
</div>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.2.0: </span>Previous versions forwarded dict entries for ‘gzip’ to
<cite>gzip.open</cite> instead of <cite>gzip.GzipFile</cite> which prevented
setting <cite>mtime</cite>.</p>
</div>
</li>
<li><strong>quoting</strong> (<em>optional constant from csv module</em>) – Defaults to csv.QUOTE_MINIMAL. If you have set a <cite>float_format</cite>
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
will treat them as non-numeric.</li>
<li><strong>quotechar</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default '&quot;'</em>) – String of length 1. Character used to quote fields.</li>
<li><strong>line_terminator</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – The newline character or character sequence to use in the output
file. Defaults to <cite>os.linesep</cite>, which depends on the OS in which
this method is called (‘\n’ for linux, ‘\r\n’ for Windows, i.e.).</li>
<li><strong>chunksize</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a>) – Rows to write at a time.</li>
<li><strong>date_format</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default None</em>) – Format string for datetime objects.</li>
<li><strong>doublequote</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Control quoting of <cite>quotechar</cite> inside a field.</li>
<li><strong>escapechar</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default None</em>) – String of length 1. Character used to escape <cite>sep</cite> and <cite>quotechar</cite>
when appropriate.</li>
<li><strong>decimal</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default '.'</em>) – Character recognized as decimal separator. E.g. use ‘,’ for
European data.</li>
<li><strong>errors</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default 'strict'</em>) – <p>Specifies how encoding and decoding errors are to be handled.
See the errors argument for <a class="reference external" href="https://docs.python.org/3/library/functions.html#open" title="(in Python v3.10)"><code class="xref py py-func docutils literal notranslate"><span class="pre">open()</span></code></a> for a full list
of options.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
<li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib</span></code> as header options. For other URLs (e.g.
starting with “s3://”, and “gcs://”) the key-value pairs are forwarded to
<code class="docutils literal notranslate"><span class="pre">fsspec</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more details.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.2.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">If path_or_buf is None, returns the resulting csv format as a
string. Otherwise returns None.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a> or <a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)">str</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">read_csv()</span></code></dt>
<dd>Load a CSV file into a DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.to_excel" title="apache_beam.dataframe.frames.DeferredSeries.to_excel"><code class="xref py py-meth docutils literal notranslate"><span class="pre">to_excel()</span></code></a></dt>
<dd>Write DeferredDataFrame to an Excel file.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;Raphael&#39;</span><span class="p">,</span> <span class="s1">&#39;Donatello&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;mask&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;red&#39;</span><span class="p">,</span> <span class="s1">&#39;purple&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;weapon&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;sai&#39;</span><span class="p">,</span> <span class="s1">&#39;bo staff&#39;</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">&#39;name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n&#39;</span>
<span class="go">Create &#39;out.zip&#39; containing &#39;out.csv&#39;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">compression_opts</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">method</span><span class="o">=</span><span class="s1">&#39;zip&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">archive_name</span><span class="o">=</span><span class="s1">&#39;out.csv&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s1">&#39;out.zip&#39;</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">compression</span><span class="o">=</span><span class="n">compression_opts</span><span class="p">)</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_excel">
<code class="descname">to_excel</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_excel" title="Permalink to this definition"></a></dt>
<dd><p>Write object to an Excel sheet.</p>
<p>To write a single object to an Excel .xlsx file it is only necessary to
specify a target file name. To write to multiple sheets it is necessary to
create an <cite>ExcelWriter</cite> object with a target file name, and specify a sheet
in the file to write to.</p>
<p>Multiple sheets may be written to by specifying unique <cite>sheet_name</cite>.
With all data written to the file it is necessary to save the changes.
Note that creating an <cite>ExcelWriter</cite> object with a file name that already
exists will result in the contents of the existing file being erased.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>excel_writer</strong> (<em>path-like</em><em>, </em><em>file-like</em><em>, or </em><em>ExcelWriter object</em>) – File path or existing ExcelWriter.</li>
<li><strong>sheet_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default 'Sheet1'</em>) – Name of sheet which will contain DeferredDataFrame.</li>
<li><strong>na_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default ''</em>) – Missing data representation.</li>
<li><strong>float_format</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – Format string for floating point numbers. For example
<code class="docutils literal notranslate"><span class="pre">float_format=&quot;%.2f&quot;</span></code> will format 0.1234 to 0.12.</li>
<li><strong>columns</strong> (<em>sequence</em><em> or </em><em>list of str</em><em>, </em><em>optional</em>) – Columns to write.</li>
<li><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em> or </em><em>list of str</em><em>, </em><em>default True</em>) – Write out the column names. If a list of string is given it is
assumed to be aliases for the column names.</li>
<li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Write row names (index).</li>
<li><strong>index_label</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>sequence</em><em>, </em><em>optional</em>) – Column label for index column(s) if desired. If not specified, and
<cite>header</cite> and <cite>index</cite> are True, then the index names are used. A
sequence should be given if the DeferredDataFrame uses MultiIndex.</li>
<li><strong>startrow</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 0</em>) – Upper left cell row to dump data frame.</li>
<li><strong>startcol</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 0</em>) – Upper left cell column to dump data frame.</li>
<li><strong>engine</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – <p>Write engine to use, ‘openpyxl’ or ‘xlsxwriter’. You can also set this
via the options <code class="docutils literal notranslate"><span class="pre">io.excel.xlsx.writer</span></code>, <code class="docutils literal notranslate"><span class="pre">io.excel.xls.writer</span></code>, and
<code class="docutils literal notranslate"><span class="pre">io.excel.xlsm.writer</span></code>.</p>
<div class="deprecated">
<p><span class="versionmodified">Deprecated since version 1.2.0: </span>As the <a class="reference external" href="https://pypi.org/project/xlwt/">xlwt</a> package is no longer
maintained, the <code class="docutils literal notranslate"><span class="pre">xlwt</span></code> engine will be removed in a future version
of pandas.</p>
</div>
</li>
<li><strong>merge_cells</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Write MultiIndex and Hierarchical Rows as merged cells.</li>
<li><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – Encoding of the resulting excel file. Only necessary for xlwt,
other writers support unicode natively.</li>
<li><strong>inf_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default 'inf'</em>) – Representation for infinity (there is no native representation for
infinity in Excel).</li>
<li><strong>verbose</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Display more information in the error logs.</li>
<li><strong>freeze_panes</strong> (<em>tuple of int</em><em> (</em><em>length 2</em><em>)</em><em>, </em><em>optional</em>) – Specifies the one-based bottommost row and rightmost column that
is to be frozen.</li>
<li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib</span></code> as header options. For other URLs (e.g.
starting with “s3://”, and “gcs://”) the key-value pairs are forwarded to
<code class="docutils literal notranslate"><span class="pre">fsspec</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more details.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.2.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.to_csv" title="apache_beam.dataframe.frames.DeferredSeries.to_csv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">to_csv()</span></code></a></dt>
<dd>Write DeferredDataFrame to a comma-separated values (csv) file.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">ExcelWriter()</span></code></dt>
<dd>Class for writing DeferredDataFrame objects into excel sheets.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">read_excel()</span></code></dt>
<dd>Read an Excel file into a pandas DeferredDataFrame.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">read_csv()</span></code></dt>
<dd>Read a comma-separated values (csv) file into DeferredDataFrame.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>For compatibility with <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_csv" title="apache_beam.dataframe.frames.DeferredDataFrame.to_csv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">to_csv()</span></code></a>,
to_excel serializes lists and dicts to strings before writing.</p>
<p>Once a workbook has been saved it is not possible to write further
data without rewriting the whole workbook.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Create, write to and save a workbook:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">],</span> <span class="p">[</span><span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;row 1&#39;</span><span class="p">,</span> <span class="s1">&#39;row 2&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;col 1&#39;</span><span class="p">,</span> <span class="s1">&#39;col 2&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="s2">&quot;output.xlsx&quot;</span><span class="p">)</span>
<span class="go">To specify the sheet name:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="s2">&quot;output.xlsx&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_1&#39;</span><span class="p">)</span>
<span class="go">If you wish to write to more than one sheet in the workbook, it is</span>
<span class="go">necessary to specify an ExcelWriter object:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">df1</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="k">with</span> <span class="n">pd</span><span class="o">.</span><span class="n">ExcelWriter</span><span class="p">(</span><span class="s1">&#39;output.xlsx&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">writer</span><span class="p">:</span>
<span class="gp">... </span> <span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="n">writer</span><span class="p">,</span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_1&#39;</span><span class="p">)</span>
<span class="gp">... </span> <span class="n">df2</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="n">writer</span><span class="p">,</span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_2&#39;</span><span class="p">)</span>
<span class="go">ExcelWriter can also be used to append to an existing Excel file:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="k">with</span> <span class="n">pd</span><span class="o">.</span><span class="n">ExcelWriter</span><span class="p">(</span><span class="s1">&#39;output.xlsx&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;a&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">writer</span><span class="p">:</span>
<span class="gp">... </span> <span class="n">df</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="n">writer</span><span class="p">,</span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_3&#39;</span><span class="p">)</span>
<span class="go">To set the library that is used to write the Excel file,</span>
<span class="go">you can pass the `engine` keyword (the default engine is</span>
<span class="go">automatically chosen depending on the file extension):</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="s1">&#39;output1.xlsx&#39;</span><span class="p">,</span> <span class="n">engine</span><span class="o">=</span><span class="s1">&#39;xlsxwriter&#39;</span><span class="p">)</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_feather">
<code class="descname">to_feather</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_feather" title="Permalink to this definition"></a></dt>
<dd><p>Write a DataFrame to the binary Feather format.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>file-like object</em>) – If a string, it will be used as Root Directory path.</li>
<li><strong>**kwargs</strong><p>Additional keywords passed to <code class="xref py py-func docutils literal notranslate"><span class="pre">pyarrow.feather.write_feather()</span></code>.
Starting with pyarrow 0.17, this includes the <cite>compression</cite>,
<cite>compression_level</cite>, <cite>chunksize</cite> and <cite>version</cite> keywords.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_hdf">
<code class="descname">to_hdf</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_hdf" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_hdf.html#pandas.DataFrame.to_hdf" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_hdf()</span></code></a> is not yet supported in the Beam DataFrame API because HDF5 is a random access file format</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_html">
<code class="descname">to_html</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_html" title="Permalink to this definition"></a></dt>
<dd><p>Render a DataFrame as an HTML table.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>buf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>Path</em><em> or </em><em>StringIO-like</em><em>, </em><em>optional</em><em>, </em><em>default None</em>) – Buffer to write to. If None, the output is returned as a string.</li>
<li><strong>columns</strong> (<em>sequence</em><em>, </em><em>optional</em><em>, </em><em>default None</em>) – The subset of columns to write. Writes all columns by default.</li>
<li><strong>col_space</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em> or </em><em>dict of int</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – <p>The minimum width of each column in CSS length units. An int is assumed to be px units.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 0.25.0: </span>Ability to use str.</p>
</div>
</li>
<li><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>optional</em>) – Whether to print column labels, default True.</li>
<li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>optional</em><em>, </em><em>default True</em>) – Whether to print index (row) labels.</li>
<li><strong>na_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em><em>, </em><em>default 'NaN'</em>) – String representation of <code class="docutils literal notranslate"><span class="pre">NaN</span></code> to use.</li>
<li><strong>formatters</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.10)"><em>tuple</em></a><em> or </em><em>dict of one-param. functions</em><em>, </em><em>optional</em>) – Formatter functions to apply to columns’ elements by position or
name.
The result of each function must be a unicode string.
List/tuple must be of length equal to the number of columns.</li>
<li><strong>float_format</strong> (<em>one-parameter function</em><em>, </em><em>optional</em><em>, </em><em>default None</em>) – <p>Formatter function to apply to columns’ elements if they are
floats. This function must return a unicode string and will be
applied only to the non-<code class="docutils literal notranslate"><span class="pre">NaN</span></code> elements, with <code class="docutils literal notranslate"><span class="pre">NaN</span></code> being
handled by <code class="docutils literal notranslate"><span class="pre">na_rep</span></code>.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.2.0.</span></p>
</div>
</li>
<li><strong>sparsify</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>optional</em><em>, </em><em>default True</em>) – Set to False for a DeferredDataFrame with a hierarchical index to print
every multiindex key at each row.</li>
<li><strong>index_names</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>optional</em><em>, </em><em>default True</em>) – Prints the names of the indexes.</li>
<li><strong>justify</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default None</em>) – <p>How to justify the column labels. If None uses the option from
the print configuration (controlled by set_option), ‘right’ out
of the box. Valid values are</p>
<ul>
<li>left</li>
<li>right</li>
<li>center</li>
<li>justify</li>
<li>justify-all</li>
<li>start</li>
<li>end</li>
<li>inherit</li>
<li>match-parent</li>
<li>initial</li>
<li>unset.</li>
</ul>
</li>
<li><strong>max_rows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – Maximum number of rows to display in the console.</li>
<li><strong>min_rows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – The number of rows to display in the console in a truncated repr
(when number of rows is above <cite>max_rows</cite>).</li>
<li><strong>max_cols</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – Maximum number of columns to display in the console.</li>
<li><strong>show_dimensions</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Display DeferredDataFrame dimensions (number of rows by number of columns).</li>
<li><strong>decimal</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default '.'</em>) – Character recognized as decimal separator, e.g. ‘,’ in Europe.</li>
<li><strong>bold_rows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Make the row labels bold in the output.</li>
<li><strong>classes</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.10)"><em>tuple</em></a><em>, </em><em>default None</em>) – CSS class(es) to apply to the resulting html table.</li>
<li><strong>escape</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Convert the characters &lt;, &gt;, and &amp; to HTML-safe sequences.</li>
<li><strong>notebook</strong> (<em>{True</em><em>, </em><em>False}</em><em>, </em><em>default False</em>) – Whether the generated HTML is for IPython Notebook.</li>
<li><strong>border</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a>) – A <code class="docutils literal notranslate"><span class="pre">border=border</span></code> attribute is included in the opening
<cite>&lt;table&gt;</cite> tag. Default <code class="docutils literal notranslate"><span class="pre">pd.options.display.html.border</span></code>.</li>
<li><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default &quot;utf-8&quot;</em>) – <p>Set character encoding.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.0.</span></p>
</div>
</li>
<li><strong>table_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – A css id is included in the opening <cite>&lt;table&gt;</cite> tag if specified.</li>
<li><strong>render_links</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Convert URLs to HTML links.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">If buf is None, returns the result as a string. Otherwise returns
None.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)">str</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.to_string" title="apache_beam.dataframe.frames.DeferredSeries.to_string"><code class="xref py py-meth docutils literal notranslate"><span class="pre">to_string()</span></code></a></dt>
<dd>Convert DeferredDataFrame to a string.</dd>
</dl>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_json">
<code class="descname">to_json</code><span class="sig-paren">(</span><em>path</em>, <em>orient=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_json" title="Permalink to this definition"></a></dt>
<dd><p>Convert the object to a JSON string.</p>
<p>Note NaN’s and None will be converted to null and datetime objects
will be converted to UNIX timestamps.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>path_or_buf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>file handle</em><em>, </em><em>optional</em>) – File path or object. If not specified, the result is returned as
a string.</li>
<li><strong>orient</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – <p>Indication of expected JSON string format.</p>
<ul>
<li>DeferredSeries:<blockquote>
<div><ul>
<li>default is ‘index’</li>
<li>allowed values are: {‘split’, ‘records’, ‘index’, ‘table’}.</li>
</ul>
</div></blockquote>
</li>
<li>DeferredDataFrame:<blockquote>
<div><ul>
<li>default is ‘columns’</li>
<li>allowed values are: {‘split’, ‘records’, ‘index’, ‘columns’,
‘values’, ‘table’}.</li>
</ul>
</div></blockquote>
</li>
<li>The format of the JSON string:<blockquote>
<div><ul>
<li>’split’ : dict like {‘index’ -&gt; [index], ‘columns’ -&gt; [columns],
‘data’ -&gt; [values]}</li>
<li>’records’ : list like [{column -&gt; value}, … , {column -&gt; value}]</li>
<li>’index’ : dict like {index -&gt; {column -&gt; value}}</li>
<li>’columns’ : dict like {column -&gt; {index -&gt; value}}</li>
<li>’values’ : just the values array</li>
<li>’table’ : dict like {‘schema’: {schema}, ‘data’: {data}}</li>
</ul>
<p>Describing the data, where data component is like <code class="docutils literal notranslate"><span class="pre">orient='records'</span></code>.</p>
</div></blockquote>
</li>
</ul>
</li>
<li><strong>date_format</strong> (<em>{None</em><em>, </em><em>'epoch'</em><em>, </em><em>'iso'}</em>) – Type of date conversion. ‘epoch’ = epoch milliseconds,
‘iso’ = ISO8601. The default depends on the <cite>orient</cite>. For
<code class="docutils literal notranslate"><span class="pre">orient='table'</span></code>, the default is ‘iso’. For all other orients,
the default is ‘epoch’.</li>
<li><strong>double_precision</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 10</em>) – The number of decimal places to use when encoding
floating point values.</li>
<li><strong>force_ascii</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Force encoded string to be ASCII.</li>
<li><strong>date_unit</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default 'ms'</em><em> (</em><em>milliseconds</em><em>)</em>) – The time unit to encode to, governs timestamp and ISO8601
precision. One of ‘s’, ‘ms’, ‘us’, ‘ns’ for second, millisecond,
microsecond, and nanosecond respectively.</li>
<li><strong>default_handler</strong> (<em>callable</em><em>, </em><em>default None</em>) – Handler to call if object cannot otherwise be converted to a
suitable format for JSON. Should receive a single argument which is
the object to convert and return a serialisable object.</li>
<li><strong>lines</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If ‘orient’ is ‘records’ write out line-delimited json format. Will
throw ValueError if incorrect ‘orient’ since others are not
list-like.</li>
<li><strong>compression</strong> (<em>{'infer'</em><em>, </em><em>'gzip'</em><em>, </em><em>'bz2'</em><em>, </em><em>'zip'</em><em>, </em><em>'xz'</em><em>, </em><em>None}</em>) – A string representing the compression to use in the output file,
only used when the first argument is a filename. By default, the
compression is inferred from the filename.</li>
<li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Whether to include the index values in the JSON string. Not
including the index (<code class="docutils literal notranslate"><span class="pre">index=False</span></code>) is only supported when
orient is ‘split’ or ‘table’.</li>
<li><strong>indent</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – <p>Length of whitespace used to indent each record.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.0.0.</span></p>
</div>
</li>
<li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib</span></code> as header options. For other URLs (e.g.
starting with “s3://”, and “gcs://”) the key-value pairs are forwarded to
<code class="docutils literal notranslate"><span class="pre">fsspec</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more details.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.2.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">If path_or_buf is None, returns the resulting json format as a
string. Otherwise returns None.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a> or <a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)">str</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">read_json()</span></code></dt>
<dd>Convert a JSON string to pandas object.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>The behavior of <code class="docutils literal notranslate"><span class="pre">indent=0</span></code> varies from the stdlib, which does not
indent the output but does insert newlines. Currently, <code class="docutils literal notranslate"><span class="pre">indent=0</span></code>
and the default <code class="docutils literal notranslate"><span class="pre">indent=None</span></code> are equivalent in pandas, though this
may change in a future release.</p>
<p><code class="docutils literal notranslate"><span class="pre">orient='table'</span></code> contains a ‘pandas_version’ field under ‘schema’.
This stores the version of <cite>pandas</cite> used in the latest revision of the
schema.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">json</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span>
<span class="gp">... </span> <span class="p">[[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">],</span> <span class="p">[</span><span class="s2">&quot;c&quot;</span><span class="p">,</span> <span class="s2">&quot;d&quot;</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;row 1&quot;</span><span class="p">,</span> <span class="s2">&quot;row 2&quot;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;col 1&quot;</span><span class="p">,</span> <span class="s2">&quot;col 2&quot;</span><span class="p">],</span>
<span class="gp">... </span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;split&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go">{</span>
<span class="go"> &quot;columns&quot;: [</span>
<span class="go"> &quot;col 1&quot;,</span>
<span class="go"> &quot;col 2&quot;</span>
<span class="go"> ],</span>
<span class="go"> &quot;index&quot;: [</span>
<span class="go"> &quot;row 1&quot;,</span>
<span class="go"> &quot;row 2&quot;</span>
<span class="go"> ],</span>
<span class="go"> &quot;data&quot;: [</span>
<span class="go"> [</span>
<span class="go"> &quot;a&quot;,</span>
<span class="go"> &quot;b&quot;</span>
<span class="go"> ],</span>
<span class="go"> [</span>
<span class="go"> &quot;c&quot;,</span>
<span class="go"> &quot;d&quot;</span>
<span class="go"> ]</span>
<span class="go"> ]</span>
<span class="go">}</span>
<span class="go">Encoding/decoding a Dataframe using ``&#39;records&#39;`` formatted JSON.</span>
<span class="go">Note that index labels are not preserved with this encoding.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;records&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go">[</span>
<span class="go"> {</span>
<span class="go"> &quot;col 1&quot;: &quot;a&quot;,</span>
<span class="go"> &quot;col 2&quot;: &quot;b&quot;</span>
<span class="go"> },</span>
<span class="go"> {</span>
<span class="go"> &quot;col 1&quot;: &quot;c&quot;,</span>
<span class="go"> &quot;col 2&quot;: &quot;d&quot;</span>
<span class="go"> }</span>
<span class="go">]</span>
<span class="go">Encoding/decoding a Dataframe using ``&#39;index&#39;`` formatted JSON:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;index&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go">{</span>
<span class="go"> &quot;row 1&quot;: {</span>
<span class="go"> &quot;col 1&quot;: &quot;a&quot;,</span>
<span class="go"> &quot;col 2&quot;: &quot;b&quot;</span>
<span class="go"> },</span>
<span class="go"> &quot;row 2&quot;: {</span>
<span class="go"> &quot;col 1&quot;: &quot;c&quot;,</span>
<span class="go"> &quot;col 2&quot;: &quot;d&quot;</span>
<span class="go"> }</span>
<span class="go">}</span>
<span class="go">Encoding/decoding a Dataframe using ``&#39;columns&#39;`` formatted JSON:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;columns&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go">{</span>
<span class="go"> &quot;col 1&quot;: {</span>
<span class="go"> &quot;row 1&quot;: &quot;a&quot;,</span>
<span class="go"> &quot;row 2&quot;: &quot;c&quot;</span>
<span class="go"> },</span>
<span class="go"> &quot;col 2&quot;: {</span>
<span class="go"> &quot;row 1&quot;: &quot;b&quot;,</span>
<span class="go"> &quot;row 2&quot;: &quot;d&quot;</span>
<span class="go"> }</span>
<span class="go">}</span>
<span class="go">Encoding/decoding a Dataframe using ``&#39;values&#39;`` formatted JSON:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;values&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go">[</span>
<span class="go"> [</span>
<span class="go"> &quot;a&quot;,</span>
<span class="go"> &quot;b&quot;</span>
<span class="go"> ],</span>
<span class="go"> [</span>
<span class="go"> &quot;c&quot;,</span>
<span class="go"> &quot;d&quot;</span>
<span class="go"> ]</span>
<span class="go">]</span>
<span class="go">Encoding with Table Schema:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;table&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go">{</span>
<span class="go"> &quot;schema&quot;: {</span>
<span class="go"> &quot;fields&quot;: [</span>
<span class="go"> {</span>
<span class="go"> &quot;name&quot;: &quot;index&quot;,</span>
<span class="go"> &quot;type&quot;: &quot;string&quot;</span>
<span class="go"> },</span>
<span class="go"> {</span>
<span class="go"> &quot;name&quot;: &quot;col 1&quot;,</span>
<span class="go"> &quot;type&quot;: &quot;string&quot;</span>
<span class="go"> },</span>
<span class="go"> {</span>
<span class="go"> &quot;name&quot;: &quot;col 2&quot;,</span>
<span class="go"> &quot;type&quot;: &quot;string&quot;</span>
<span class="go"> }</span>
<span class="go"> ],</span>
<span class="go"> &quot;primaryKey&quot;: [</span>
<span class="go"> &quot;index&quot;</span>
<span class="go"> ],</span>
<span class="go"> &quot;pandas_version&quot;: &quot;0.20.0&quot;</span>
<span class="go"> },</span>
<span class="go"> &quot;data&quot;: [</span>
<span class="go"> {</span>
<span class="go"> &quot;index&quot;: &quot;row 1&quot;,</span>
<span class="go"> &quot;col 1&quot;: &quot;a&quot;,</span>
<span class="go"> &quot;col 2&quot;: &quot;b&quot;</span>
<span class="go"> },</span>
<span class="go"> {</span>
<span class="go"> &quot;index&quot;: &quot;row 2&quot;,</span>
<span class="go"> &quot;col 1&quot;: &quot;c&quot;,</span>
<span class="go"> &quot;col 2&quot;: &quot;d&quot;</span>
<span class="go"> }</span>
<span class="go"> ]</span>
<span class="go">}</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_latex">
<code class="descname">to_latex</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_latex" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.to_latex.html#pandas.Series.to_latex" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.to_latex()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_latex’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_markdown">
<code class="descname">to_markdown</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_markdown" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.to_markdown.html#pandas.Series.to_markdown" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.to_markdown()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_markdown’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_msgpack">
<code class="descname">to_msgpack</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_msgpack" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_msgpack()</span></code> is not yet supported in the Beam DataFrame API because it is deprecated in pandas.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_parquet">
<code class="descname">to_parquet</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_parquet" title="Permalink to this definition"></a></dt>
<dd><p>Write a DataFrame to the binary parquet format.</p>
<p>This function writes the dataframe as a <a class="reference external" href="https://parquet.apache.org/">parquet file</a>. You can choose different parquet
backends, and have the option of compression. See
<a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/io.html#io-parquet" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">the user guide</span></a> for more details.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>file-like object</em><em>, </em><em>default None</em>) – <p>If a string, it will be used as Root Directory path
when writing a partitioned dataset. By file-like object,
we refer to objects with a write() method, such as a file handle
(e.g. via builtin open function) or io.BytesIO. The engine
fastparquet does not accept file-like objects. If path is None,
a bytes object is returned.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.2.0.</span></p>
</div>
<p>Previously this was “fname”</p>
</li>
<li><strong>engine</strong> (<em>{'auto'</em><em>, </em><em>'pyarrow'</em><em>, </em><em>'fastparquet'}</em><em>, </em><em>default 'auto'</em>) – Parquet library to use. If ‘auto’, then the option
<code class="docutils literal notranslate"><span class="pre">io.parquet.engine</span></code> is used. The default <code class="docutils literal notranslate"><span class="pre">io.parquet.engine</span></code>
behavior is to try ‘pyarrow’, falling back to ‘fastparquet’ if
‘pyarrow’ is unavailable.</li>
<li><strong>compression</strong> (<em>{'snappy'</em><em>, </em><em>'gzip'</em><em>, </em><em>'brotli'</em><em>, </em><em>None}</em><em>, </em><em>default 'snappy'</em>) – Name of the compression to use. Use <code class="docutils literal notranslate"><span class="pre">None</span></code> for no compression.</li>
<li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, include the dataframe’s index(es) in the file output.
If <code class="docutils literal notranslate"><span class="pre">False</span></code>, they will not be written to the file.
If <code class="docutils literal notranslate"><span class="pre">None</span></code>, similar to <code class="docutils literal notranslate"><span class="pre">True</span></code> the dataframe’s index(es)
will be saved. However, instead of being saved as values,
the RangeIndex will be stored as a range in the metadata so it
doesn’t require much space and is faster. Other indexes will
be included as columns in the file output.</li>
<li><strong>partition_cols</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, </em><em>optional</em><em>, </em><em>default None</em>) – Column names by which to partition the dataset.
Columns are partitioned in the order they are given.
Must be None if path is not a string.</li>
<li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib</span></code> as header options. For other URLs (e.g.
starting with “s3://”, and “gcs://”) the key-value pairs are forwarded to
<code class="docutils literal notranslate"><span class="pre">fsspec</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more details.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.2.0.</span></p>
</div>
</li>
<li><strong>**kwargs</strong> – Additional arguments passed to the parquet library. See
<a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/io.html#io-parquet" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">pandas io</span></a> for more details.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bytes if no path argument is provided else None</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">read_parquet()</span></code></dt>
<dd>Read a parquet file.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_csv" title="apache_beam.dataframe.frames.DeferredDataFrame.to_csv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.to_csv()</span></code></a></dt>
<dd>Write a csv file.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_sql" title="apache_beam.dataframe.frames.DeferredDataFrame.to_sql"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.to_sql()</span></code></a></dt>
<dd>Write to a sql table.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_hdf" title="apache_beam.dataframe.frames.DeferredDataFrame.to_hdf"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.to_hdf()</span></code></a></dt>
<dd>Write to hdf.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>This function requires either the <a class="reference external" href="https://pypi.org/project/fastparquet">fastparquet</a> or <a class="reference external" href="https://arrow.apache.org/docs/python/">pyarrow</a> library.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_parquet</span><span class="p">(</span><span class="s1">&#39;df.parquet.gzip&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">compression</span><span class="o">=</span><span class="s1">&#39;gzip&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="s1">&#39;df.parquet.gzip&#39;</span><span class="p">)</span>
<span class="go"> col1 col2</span>
<span class="go">0 1 3</span>
<span class="go">1 2 4</span>
<span class="go">If you want to get a buffer to the parquet content you can use a io.BytesIO</span>
<span class="go">object, as long as you don&#39;t use partition_cols, which creates multiple files.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">io</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">f</span> <span class="o">=</span> <span class="n">io</span><span class="o">.</span><span class="n">BytesIO</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_parquet</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">f</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="go">0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">content</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_period">
<code class="descname">to_period</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_period" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.to_period.html#pandas.Series.to_period" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.to_period()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_period’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_pickle">
<code class="descname">to_pickle</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_pickle" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.to_pickle.html#pandas.Series.to_pickle" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.to_pickle()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_pickle’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_sql">
<code class="descname">to_sql</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_sql" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.to_sql.html#pandas.Series.to_sql" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.to_sql()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_sql’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_stata">
<code class="descname">to_stata</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_stata" title="Permalink to this definition"></a></dt>
<dd><p>Export DataFrame object to Stata dta format.</p>
<p>Writes the DataFrame to a Stata dataset file.
“dta” files contain a Stata dataset.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>buffer</em><em> or </em><em>path object</em>) – <p>String, path object (pathlib.Path or py._path.local.LocalPath) or
object implementing a binary write() function. If using a buffer
then the buffer will not be automatically closed after the file
data has been written.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.0.0.</span></p>
</div>
<p>Previously this was “fname”</p>
</li>
<li><strong>convert_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a>) – Dictionary mapping columns containing datetime types to stata
internal format to use when writing the dates. Options are ‘tc’,
‘td’, ‘tm’, ‘tw’, ‘th’, ‘tq’, ‘ty’. Column can be either an integer
or a name. Datetime columns that do not have a conversion type
specified will be converted to ‘tc’. Raises NotImplementedError if
a datetime column has timezone information.</li>
<li><strong>write_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a>) – Write the index to Stata dataset.</li>
<li><strong>byteorder</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – Can be “&gt;”, “&lt;”, “little”, or “big”. default is <cite>sys.byteorder</cite>.</li>
<li><strong>time_stamp</strong> (<em>datetime</em>) – A datetime to use as file creation date. Default is the current
time.</li>
<li><strong>data_label</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – A label for the data set. Must be 80 characters or smaller.</li>
<li><strong>variable_labels</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a>) – Dictionary containing columns as keys and variable labels as
values. Each label must be 80 characters or smaller.</li>
<li><strong>version</strong> (<em>{114</em><em>, </em><em>117</em><em>, </em><em>118</em><em>, </em><em>119</em><em>, </em><em>None}</em><em>, </em><em>default 114</em>) – <p>Version to use in the output dta file. Set to None to let pandas
decide between 118 or 119 formats depending on the number of
columns in the frame. pandas Version 114 can be read by Stata 10 and
later. pandas Version 117 can be read by Stata 13 or later. pandas Version 118
is supported in Stata 14 and later. pandas Version 119 is supported in
Stata 15 and later. pandas Version 114 limits string variables to 244
characters or fewer while versions 117 and later allow strings
with lengths up to 2,000,000 characters. Versions 118 and 119
support Unicode characters, and pandas version 119 supports more than
32,767 variables.</p>
<p>pandas Version 119 should usually only be used when the number of
variables exceeds the capacity of dta format 118. Exporting
smaller datasets in format 119 may have unintended consequences,
and, as of November 2020, Stata SE cannot read pandas version 119 files.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.0.0: </span>Added support for formats 118 and 119.</p>
</div>
</li>
<li><strong>convert_strl</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, </em><em>optional</em>) – List of column names to convert to string columns to Stata StrL
format. Only available if version is 117. Storing strings in the
StrL format can produce smaller dta files if strings have more than
8 characters and values are repeated.</li>
<li><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly compression of the output dta. If string, specifies
compression mode. If dict, value at key ‘method’ specifies
compression mode. Compression mode must be one of {‘infer’, ‘gzip’,
‘bz2’, ‘zip’, ‘xz’, None}. If compression mode is ‘infer’ and
<cite>fname</cite> is path-like, then detect compression from the following
extensions: ‘.gz’, ‘.bz2’, ‘.zip’, or ‘.xz’ (otherwise no
compression). If dict and compression mode is one of {‘zip’,
‘gzip’, ‘bz2’}, or inferred as one of the above, other entries
passed as additional compression options.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
<li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib</span></code> as header options. For other URLs (e.g.
starting with “s3://”, and “gcs://”) the key-value pairs are forwarded to
<code class="docutils literal notranslate"><span class="pre">fsspec</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more details.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.2.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><ul class="first last">
<li><p class="first"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#NotImplementedError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">NotImplementedError</span></code></a> – * If datetimes contain timezone information
* Column dtype is not representable in Stata</p>
</li>
<li><dl class="first docutils">
<dt><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#ValueError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> – * Columns listed in convert_dates are neither datetime64[ns]</dt>
<dd><p class="first last">or datetime.datetime</p>
</dd>
</dl>
<ul class="simple">
<li>Column listed in convert_dates is not in DeferredDataFrame</li>
<li>Categorical label contains more than 32,000 characters</li>
</ul>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">read_stata()</span></code></dt>
<dd>Import Stata data files.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">io.stata.StataWriter()</span></code></dt>
<dd>Low-level writer for Stata data files.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">io.stata.StataWriter117()</span></code></dt>
<dd>Low-level writer for pandas version 117 files.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;animal&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;parrot&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;parrot&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;speed&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">350</span><span class="p">,</span> <span class="mi">18</span><span class="p">,</span> <span class="mi">361</span><span class="p">,</span> <span class="mi">15</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_stata</span><span class="p">(</span><span class="s1">&#39;animals.dta&#39;</span><span class="p">)</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_timestamp">
<code class="descname">to_timestamp</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_timestamp" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.to_timestamp.html#pandas.Series.to_timestamp" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.to_timestamp()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_timestamp’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.to_xarray">
<code class="descname">to_xarray</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.to_xarray" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_xarray.html#pandas.DataFrame.to_xarray" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_xarray()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.transform">
<code class="descname">transform</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.transform" title="Permalink to this definition"></a></dt>
<dd><p>Call <code class="docutils literal notranslate"><span class="pre">func</span></code> on self producing a DataFrame with transformed values.</p>
<p>Produced DataFrame will have same axis length as self.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>func</strong> (<em>function</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>list-like</em><em> or </em><em>dict-like</em>) – <p>Function to use for transforming the data. If a function, must either
work when passed a DeferredDataFrame or when passed to DeferredDataFrame.apply. If func
is both list-like and dict-like, dict-like behavior takes precedence.</p>
<p>Accepted combinations are:</p>
<ul>
<li>function</li>
<li>string function name</li>
<li>list-like of functions and/or function names, e.g. <code class="docutils literal notranslate"><span class="pre">[np.exp,</span> <span class="pre">'sqrt']</span></code></li>
<li>dict-like of axis labels -&gt; functions, function names or list-like of such.</li>
</ul>
</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – If 0 or ‘index’: apply function to each column.
If 1 or ‘columns’: apply function to each row.</li>
<li><strong>*args</strong> – Positional arguments to pass to <cite>func</cite>.</li>
<li><strong>**kwargs</strong> – Keyword arguments to pass to <cite>func</cite>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A DeferredDataFrame that must have the same length as self.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last">ValueError : If the returned DeferredDataFrame has a different length than self.</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.agg" title="apache_beam.dataframe.frames.DeferredDataFrame.agg"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.agg()</span></code></a></dt>
<dd>Only perform aggregating type operations.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.apply" title="apache_beam.dataframe.frames.DeferredDataFrame.apply"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.apply()</span></code></a></dt>
<dd>Invoke function on a DeferredDataFrame.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Functions that mutate the passed object can produce unexpected
behavior or errors and are not supported. See <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/gotchas.html#gotchas-udf-mutation" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span>Mutating with User Defined Function (UDF) methods</span></a>
for more details.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">3</span><span class="p">),</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">4</span><span class="p">)})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 0 1</span>
<span class="go">1 1 2</span>
<span class="go">2 2 3</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 1 2</span>
<span class="go">1 2 3</span>
<span class="go">2 3 4</span>
<span class="go">Even though the resulting DataFrame must have the same length as the</span>
<span class="go">input DataFrame, it is possible to provide several input functions:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">3</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 0</span>
<span class="go">1 1</span>
<span class="go">2 2</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">transform</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">])</span>
<span class="go"> sqrt exp</span>
<span class="go">0 0.000000 1.000000</span>
<span class="go">1 1.000000 2.718282</span>
<span class="go">2 1.414214 7.389056</span>
<span class="go">You can call transform on a GroupBy object:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span>
<span class="gp">... </span> <span class="s2">&quot;Date&quot;</span><span class="p">:</span> <span class="p">[</span>
<span class="gp">... </span> <span class="s2">&quot;2015-05-08&quot;</span><span class="p">,</span> <span class="s2">&quot;2015-05-07&quot;</span><span class="p">,</span> <span class="s2">&quot;2015-05-06&quot;</span><span class="p">,</span> <span class="s2">&quot;2015-05-05&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;2015-05-08&quot;</span><span class="p">,</span> <span class="s2">&quot;2015-05-07&quot;</span><span class="p">,</span> <span class="s2">&quot;2015-05-06&quot;</span><span class="p">,</span> <span class="s2">&quot;2015-05-05&quot;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s2">&quot;Data&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">50</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">60</span><span class="p">,</span> <span class="mi">120</span><span class="p">],</span>
<span class="gp">... </span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> Date Data</span>
<span class="go">0 2015-05-08 5</span>
<span class="go">1 2015-05-07 8</span>
<span class="go">2 2015-05-06 6</span>
<span class="go">3 2015-05-05 1</span>
<span class="go">4 2015-05-08 50</span>
<span class="go">5 2015-05-07 100</span>
<span class="go">6 2015-05-06 60</span>
<span class="go">7 2015-05-05 120</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">&#39;Date&#39;</span><span class="p">)[</span><span class="s1">&#39;Data&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="s1">&#39;sum&#39;</span><span class="p">)</span>
<span class="go">0 55</span>
<span class="go">1 108</span>
<span class="go">2 66</span>
<span class="go">3 121</span>
<span class="go">4 55</span>
<span class="go">5 108</span>
<span class="go">6 66</span>
<span class="go">7 121</span>
<span class="go">Name: Data, dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span>
<span class="gp">... </span> <span class="s2">&quot;c&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span>
<span class="gp">... </span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;m&quot;</span><span class="p">,</span> <span class="s2">&quot;n&quot;</span><span class="p">,</span> <span class="s2">&quot;o&quot;</span><span class="p">,</span> <span class="s2">&quot;m&quot;</span><span class="p">,</span> <span class="s2">&quot;m&quot;</span><span class="p">,</span> <span class="s2">&quot;n&quot;</span><span class="p">,</span> <span class="s2">&quot;n&quot;</span><span class="p">]</span>
<span class="gp">... </span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> c type</span>
<span class="go">0 1 m</span>
<span class="go">1 1 n</span>
<span class="go">2 1 o</span>
<span class="go">3 2 m</span>
<span class="go">4 2 m</span>
<span class="go">5 2 n</span>
<span class="go">6 2 n</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="p">[</span><span class="s1">&#39;size&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">&#39;c&#39;</span><span class="p">)[</span><span class="s1">&#39;type&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="nb">len</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> c type size</span>
<span class="go">0 1 m 3</span>
<span class="go">1 1 n 3</span>
<span class="go">2 1 o 3</span>
<span class="go">3 2 m 4</span>
<span class="go">4 2 m 4</span>
<span class="go">5 2 n 4</span>
<span class="go">6 2 n 4</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.truediv">
<code class="descname">truediv</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.truediv" title="Permalink to this definition"></a></dt>
<dd><p>Return Floating division of series and other, element-wise (binary operator <cite>truediv</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">series</span> <span class="pre">/</span> <span class="pre">other</span></code>, but with support to substitute a fill_value for
missing data in either one of the inputs.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><em>scalar value</em>) – </li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> or </em><em>float value</em><em>, </em><em>default None</em><em> (</em><em>NaN</em><em>)</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredSeries alignment, with this value before computation.
If data in both corresponding DeferredSeries locations is missing
the result of filling (at that location) will be missing.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>name</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rtruediv" title="apache_beam.dataframe.frames.DeferredSeries.rtruediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.rtruediv()</span></code></a></dt>
<dd>Reverse of the Floating division operator, see <a class="reference external" href="https://docs.python.org/3/reference/datamodel.html#emulating-numeric-types">Python documentation</a> for more details.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">a</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span>
<span class="go">a 1.0</span>
<span class="go">b 1.0</span>
<span class="go">c 1.0</span>
<span class="go">d NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">b</span>
<span class="go">a 1.0</span>
<span class="go">b NaN</span>
<span class="go">d 1.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">a</span><span class="o">.</span><span class="n">divide</span><span class="p">(</span><span class="n">b</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go">a 1.0</span>
<span class="go">b inf</span>
<span class="go">c inf</span>
<span class="go">d 0.0</span>
<span class="go">e NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.truncate">
<code class="descname">truncate</code><span class="sig-paren">(</span><em>before</em>, <em>after</em>, <em>axis</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.truncate" title="Permalink to this definition"></a></dt>
<dd><p>Truncate a Series or DataFrame before and after some index value.</p>
<p>This is a useful shorthand for boolean indexing based on index
values above or below certain thresholds.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>before</strong> (<em>date</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a>) – Truncate all rows before this index value.</li>
<li><strong>after</strong> (<em>date</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a>) – Truncate all rows after this index value.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>optional</em>) – Axis to truncate. Truncates the index (rows) by default.</li>
<li><strong>copy</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default is True</em><em>,</em>) – Return a copy of the truncated section.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The truncated DeferredSeries or DeferredDataFrame.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">type of caller</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.loc" title="apache_beam.dataframe.frames.DeferredDataFrame.loc"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.loc()</span></code></a></dt>
<dd>Select a subset of a DeferredDataFrame by label.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.iloc" title="apache_beam.dataframe.frames.DeferredDataFrame.iloc"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.iloc()</span></code></a></dt>
<dd>Select a subset of a DeferredDataFrame by position.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>If the index being truncated contains only datetime values,
<cite>before</cite> and <cite>after</cite> may be specified as strings instead of
Timestamps.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;f&#39;</span><span class="p">,</span> <span class="s1">&#39;g&#39;</span><span class="p">,</span> <span class="s1">&#39;h&#39;</span><span class="p">,</span> <span class="s1">&#39;i&#39;</span><span class="p">,</span> <span class="s1">&#39;j&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;C&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;k&#39;</span><span class="p">,</span> <span class="s1">&#39;l&#39;</span><span class="p">,</span> <span class="s1">&#39;m&#39;</span><span class="p">,</span> <span class="s1">&#39;n&#39;</span><span class="p">,</span> <span class="s1">&#39;o&#39;</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B C</span>
<span class="go">1 a f k</span>
<span class="go">2 b g l</span>
<span class="go">3 c h m</span>
<span class="go">4 d i n</span>
<span class="go">5 e j o</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">truncate</span><span class="p">(</span><span class="n">before</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">after</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">2 b g l</span>
<span class="go">3 c h m</span>
<span class="go">4 d i n</span>
<span class="go">The columns of a DataFrame can be truncated.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">truncate</span><span class="p">(</span><span class="n">before</span><span class="o">=</span><span class="s2">&quot;A&quot;</span><span class="p">,</span> <span class="n">after</span><span class="o">=</span><span class="s2">&quot;B&quot;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="s2">&quot;columns&quot;</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">1 a f</span>
<span class="go">2 b g</span>
<span class="go">3 c h</span>
<span class="go">4 d i</span>
<span class="go">5 e j</span>
<span class="go">For Series, only rows can be truncated.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">truncate</span><span class="p">(</span><span class="n">before</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">after</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go">2 b</span>
<span class="go">3 c</span>
<span class="go">4 d</span>
<span class="go">Name: A, dtype: object</span>
<span class="go">The index values in ``truncate`` can be datetimes or string</span>
<span class="go">dates.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">dates</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s1">&#39;2016-01-01&#39;</span><span class="p">,</span> <span class="s1">&#39;2016-02-01&#39;</span><span class="p">,</span> <span class="n">freq</span><span class="o">=</span><span class="s1">&#39;s&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="n">dates</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">tail</span><span class="p">()</span>
<span class="go"> A</span>
<span class="go">2016-01-31 23:59:56 1</span>
<span class="go">2016-01-31 23:59:57 1</span>
<span class="go">2016-01-31 23:59:58 1</span>
<span class="go">2016-01-31 23:59:59 1</span>
<span class="go">2016-02-01 00:00:00 1</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">truncate</span><span class="p">(</span><span class="n">before</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;2016-01-05&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">after</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;2016-01-10&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">tail</span><span class="p">()</span>
<span class="go"> A</span>
<span class="go">2016-01-09 23:59:56 1</span>
<span class="go">2016-01-09 23:59:57 1</span>
<span class="go">2016-01-09 23:59:58 1</span>
<span class="go">2016-01-09 23:59:59 1</span>
<span class="go">2016-01-10 00:00:00 1</span>
<span class="go">Because the index is a DatetimeIndex containing only dates, we can</span>
<span class="go">specify `before` and `after` as strings. They will be coerced to</span>
<span class="go">Timestamps before truncation.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">truncate</span><span class="p">(</span><span class="s1">&#39;2016-01-05&#39;</span><span class="p">,</span> <span class="s1">&#39;2016-01-10&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">tail</span><span class="p">()</span>
<span class="go"> A</span>
<span class="go">2016-01-09 23:59:56 1</span>
<span class="go">2016-01-09 23:59:57 1</span>
<span class="go">2016-01-09 23:59:58 1</span>
<span class="go">2016-01-09 23:59:59 1</span>
<span class="go">2016-01-10 00:00:00 1</span>
<span class="go">Note that ``truncate`` assumes a 0 value for any unspecified time</span>
<span class="go">component (midnight). This differs from partial string slicing, which</span>
<span class="go">returns any partially matching dates.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">&#39;2016-01-05&#39;</span><span class="p">:</span><span class="s1">&#39;2016-01-10&#39;</span><span class="p">,</span> <span class="p">:]</span><span class="o">.</span><span class="n">tail</span><span class="p">()</span>
<span class="go"> A</span>
<span class="go">2016-01-10 23:59:55 1</span>
<span class="go">2016-01-10 23:59:56 1</span>
<span class="go">2016-01-10 23:59:57 1</span>
<span class="go">2016-01-10 23:59:58 1</span>
<span class="go">2016-01-10 23:59:59 1</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.tz_convert">
<code class="descname">tz_convert</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.tz_convert" title="Permalink to this definition"></a></dt>
<dd><p>Convert tz-aware axis to target time zone.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>tz</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>tzinfo object</em>) – </li>
<li><strong>axis</strong> (<em>the axis to convert</em>) – </li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default None</em>) – If axis is a MultiIndex, convert a specific level. Otherwise
must be None.</li>
<li><strong>copy</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Also make a copy of the underlying data.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Object with time zone converted axis.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first">{klass}</p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#TypeError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> – If the axis is tz-naive.</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.tz_localize">
<code class="descname">tz_localize</code><span class="sig-paren">(</span><em>ambiguous</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.tz_localize" title="Permalink to this definition"></a></dt>
<dd><p>Localize tz-naive index of a Series or DataFrame to target time zone.</p>
<p>This operation localizes the Index. To localize the values in a
timezone-naive Series, use <code class="xref py py-meth docutils literal notranslate"><span class="pre">Series.dt.tz_localize()</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>tz</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>tzinfo</em>) – </li>
<li><strong>axis</strong> (<em>the axis to localize</em>) – </li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default None</em>) – If axis ia a MultiIndex, localize a specific level. Otherwise
must be None.</li>
<li><strong>copy</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Also make a copy of the underlying data.</li>
<li><strong>ambiguous</strong> (<em>'infer'</em><em>, </em><em>bool-ndarray</em><em>, </em><em>'NaT'</em><em>, </em><em>default 'raise'</em>) – <p>When clocks moved backward due to DST, ambiguous times may arise.
For example in Central European Time (UTC+01), when going from
03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at
00:30:00 UTC and at 01:30:00 UTC. In such a situation, the
<cite>ambiguous</cite> parameter dictates how ambiguous times should be
handled.</p>
<ul>
<li>’infer’ will attempt to infer fall dst-transition hours based on
order</li>
<li>bool-ndarray where True signifies a DST time, False designates
a non-DST time (note that this flag is only applicable for
ambiguous times)</li>
<li>’NaT’ will return NaT where there are ambiguous times</li>
<li>’raise’ will raise an AmbiguousTimeError if there are ambiguous
times.</li>
</ul>
</li>
<li><strong>nonexistent</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default 'raise'</em>) – <p>A nonexistent time does not exist in a particular timezone
where clocks moved forward due to DST. Valid values are:</p>
<ul>
<li>’shift_forward’ will shift the nonexistent time forward to the
closest existing time</li>
<li>’shift_backward’ will shift the nonexistent time backward to the
closest existing time</li>
<li>’NaT’ will return NaT where there are nonexistent times</li>
<li>timedelta objects will shift nonexistent times by the timedelta</li>
<li>’raise’ will raise an NonExistentTimeError if there are
nonexistent times.</li>
</ul>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Same type as the input.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#TypeError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> – If the TimeDeferredSeries is tz-aware and tz is not None.</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">ambiguous</span></code> cannot be set to <code class="docutils literal notranslate"><span class="pre">&quot;infer&quot;</span></code> as its semantics are
order-sensitive. Similarly, specifying <code class="docutils literal notranslate"><span class="pre">ambiguous</span></code> as an
<a class="reference external" href="https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html#numpy.ndarray" title="(in NumPy v1.22)"><code class="xref py py-class docutils literal notranslate"><span class="pre">ndarray</span></code></a> is order-sensitive, but you can achieve similar
functionality by specifying <code class="docutils literal notranslate"><span class="pre">ambiguous</span></code> as a Series.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Localize local times:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">DatetimeIndex</span><span class="p">([</span><span class="s1">&#39;2018-09-15 01:30:00&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">tz_localize</span><span class="p">(</span><span class="s1">&#39;CET&#39;</span><span class="p">)</span>
<span class="go">2018-09-15 01:30:00+02:00 1</span>
<span class="go">dtype: int64</span>
<span class="go">Be careful with DST changes. When there is sequential data, pandas</span>
<span class="go">can infer the DST time:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">7</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">DatetimeIndex</span><span class="p">([</span><span class="s1">&#39;2018-10-28 01:30:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 02:00:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 02:30:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 02:00:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 02:30:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 03:00:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 03:30:00&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">tz_localize</span><span class="p">(</span><span class="s1">&#39;CET&#39;</span><span class="p">,</span> <span class="n">ambiguous</span><span class="o">=</span><span class="s1">&#39;infer&#39;</span><span class="p">)</span>
<span class="go">2018-10-28 01:30:00+02:00 0</span>
<span class="go">2018-10-28 02:00:00+02:00 1</span>
<span class="go">2018-10-28 02:30:00+02:00 2</span>
<span class="go">2018-10-28 02:00:00+01:00 3</span>
<span class="go">2018-10-28 02:30:00+01:00 4</span>
<span class="go">2018-10-28 03:00:00+01:00 5</span>
<span class="go">2018-10-28 03:30:00+01:00 6</span>
<span class="go">dtype: int64</span>
<span class="go">In some cases, inferring the DST is impossible. In such cases, you can</span>
<span class="go">pass an ndarray to the ambiguous parameter to set the DST explicitly</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">3</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">DatetimeIndex</span><span class="p">([</span><span class="s1">&#39;2018-10-28 01:20:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 02:36:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 03:46:00&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">tz_localize</span><span class="p">(</span><span class="s1">&#39;CET&#39;</span><span class="p">,</span> <span class="n">ambiguous</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="kc">True</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">]))</span>
<span class="go">2018-10-28 01:20:00+02:00 0</span>
<span class="go">2018-10-28 02:36:00+02:00 1</span>
<span class="go">2018-10-28 03:46:00+01:00 2</span>
<span class="go">dtype: int64</span>
<span class="go">If the DST transition causes nonexistent times, you can shift these</span>
<span class="go">dates forward or backward with a timedelta object or `&#39;shift_forward&#39;`</span>
<span class="go">or `&#39;shift_backward&#39;`.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">DatetimeIndex</span><span class="p">([</span><span class="s1">&#39;2015-03-29 02:30:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2015-03-29 03:30:00&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">tz_localize</span><span class="p">(</span><span class="s1">&#39;Europe/Warsaw&#39;</span><span class="p">,</span> <span class="n">nonexistent</span><span class="o">=</span><span class="s1">&#39;shift_forward&#39;</span><span class="p">)</span>
<span class="go">2015-03-29 03:00:00+02:00 0</span>
<span class="go">2015-03-29 03:30:00+02:00 1</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">tz_localize</span><span class="p">(</span><span class="s1">&#39;Europe/Warsaw&#39;</span><span class="p">,</span> <span class="n">nonexistent</span><span class="o">=</span><span class="s1">&#39;shift_backward&#39;</span><span class="p">)</span>
<span class="go">2015-03-29 01:59:59.999999999+01:00 0</span>
<span class="go">2015-03-29 03:30:00+02:00 1</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">tz_localize</span><span class="p">(</span><span class="s1">&#39;Europe/Warsaw&#39;</span><span class="p">,</span> <span class="n">nonexistent</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">Timedelta</span><span class="p">(</span><span class="s1">&#39;1H&#39;</span><span class="p">))</span>
<span class="go">2015-03-29 03:30:00+02:00 0</span>
<span class="go">2015-03-29 03:30:00+02:00 1</span>
<span class="go">dtype: int64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.where">
<code class="descname">where</code><span class="sig-paren">(</span><em>cond</em>, <em>other</em>, <em>errors</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.where" title="Permalink to this definition"></a></dt>
<dd><p>where is not parallelizable when <code class="docutils literal notranslate"><span class="pre">errors=&quot;ignore&quot;</span></code> is specified.</p>
</dd></dl>
<dl class="classmethod">
<dt id="apache_beam.dataframe.frames.DeferredSeries.wrap">
<em class="property">classmethod </em><code class="descname">wrap</code><span class="sig-paren">(</span><em>expr</em>, <em>split_tuples=True</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.wrap" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredSeries.xs">
<code class="descname">xs</code><span class="sig-paren">(</span><em>key</em>, <em>axis</em>, <em>level</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredSeries.xs" title="Permalink to this definition"></a></dt>
<dd><p>Return cross-section from the Series/DataFrame.</p>
<p>This method takes a <cite>key</cite> argument to select data at a particular
level of a MultiIndex.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>key</strong> (<em>label</em><em> or </em><em>tuple of label</em>) – Label contained in the index, or partially in a MultiIndex.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – Axis to retrieve cross-section on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><em>object</em></a><em>, </em><em>defaults to first n levels</em><em> (</em><em>n=1</em><em> or </em><em>len</em><em>(</em><em>key</em><em>)</em><em>)</em>) – In case of a key partially contained in a MultiIndex, indicate
which levels are used. Levels can be referred by label or position.</li>
<li><strong>drop_level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – If False, returns object with same levels as self.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Cross-section from the original DeferredSeries or DeferredDataFrame
corresponding to the selected index levels.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Note that <code class="docutils literal notranslate"><span class="pre">xs(axis='index')</span></code> will raise a <code class="docutils literal notranslate"><span class="pre">KeyError</span></code> at execution
time if the key does not exist in the index.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.loc" title="apache_beam.dataframe.frames.DeferredDataFrame.loc"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.loc()</span></code></a></dt>
<dd>Access a group of rows and columns by label(s) or a boolean array.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.iloc" title="apache_beam.dataframe.frames.DeferredDataFrame.iloc"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.iloc()</span></code></a></dt>
<dd>Purely integer-location based indexing for selection by position.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p><cite>xs</cite> can not be used to set values.</p>
<p>MultiIndex Slicers is a generic way to get/set values on
any level or levels.
It is a superset of <cite>xs</cite> functionality, see
<a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/advanced.html#advanced-mi-slicers" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">MultiIndex Slicers</span></a>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">d</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;num_legs&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;num_wings&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;class&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="s1">&#39;bird&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;animal&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;cat&#39;</span><span class="p">,</span> <span class="s1">&#39;dog&#39;</span><span class="p">,</span> <span class="s1">&#39;bat&#39;</span><span class="p">,</span> <span class="s1">&#39;penguin&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;locomotion&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;walks&#39;</span><span class="p">,</span> <span class="s1">&#39;walks&#39;</span><span class="p">,</span> <span class="s1">&#39;flies&#39;</span><span class="p">,</span> <span class="s1">&#39;walks&#39;</span><span class="p">]}</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="n">d</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">set_index</span><span class="p">([</span><span class="s1">&#39;class&#39;</span><span class="p">,</span> <span class="s1">&#39;animal&#39;</span><span class="p">,</span> <span class="s1">&#39;locomotion&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> num_legs num_wings</span>
<span class="go">class animal locomotion</span>
<span class="go">mammal cat walks 4 0</span>
<span class="go"> dog walks 4 0</span>
<span class="go"> bat flies 2 2</span>
<span class="go">bird penguin walks 2 2</span>
<span class="go">Get values at specified index</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;mammal&#39;</span><span class="p">)</span>
<span class="go"> num_legs num_wings</span>
<span class="go">animal locomotion</span>
<span class="go">cat walks 4 0</span>
<span class="go">dog walks 4 0</span>
<span class="go">bat flies 2 2</span>
<span class="go">Get values at several indexes</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">xs</span><span class="p">((</span><span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="s1">&#39;dog&#39;</span><span class="p">))</span>
<span class="go"> num_legs num_wings</span>
<span class="go">locomotion</span>
<span class="go">walks 4 0</span>
<span class="go">Get values at specified index and level</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;cat&#39;</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> num_legs num_wings</span>
<span class="go">class locomotion</span>
<span class="go">mammal walks 4 0</span>
<span class="go">Get values at several indexes and levels</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">xs</span><span class="p">((</span><span class="s1">&#39;bird&#39;</span><span class="p">,</span> <span class="s1">&#39;walks&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">level</span><span class="o">=</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="s1">&#39;locomotion&#39;</span><span class="p">])</span>
<span class="go"> num_legs num_wings</span>
<span class="go">animal</span>
<span class="go">penguin 2 2</span>
<span class="go">Get values at specified column and axis</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;num_wings&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">class animal locomotion</span>
<span class="go">mammal cat walks 0</span>
<span class="go"> dog walks 0</span>
<span class="go"> bat flies 2</span>
<span class="go">bird penguin walks 2</span>
<span class="go">Name: num_wings, dtype: int64</span>
</pre></div>
</div>
</dd></dl>
</dd></dl>
<dl class="class">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame">
<em class="property">class </em><code class="descclassname">apache_beam.dataframe.frames.</code><code class="descname">DeferredDataFrame</code><span class="sig-paren">(</span><em>expr</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="Permalink to this definition"></a></dt>
<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">apache_beam.dataframe.frames.DeferredDataFrameOrSeries</span></code></p>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.columns">
<code class="descname">columns</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.columns" title="Permalink to this definition"></a></dt>
<dd><p>The column labels of the DataFrame.</p>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.keys">
<code class="descname">keys</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.keys"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.keys" title="Permalink to this definition"></a></dt>
<dd><p>Get the ‘info axis’ (see Indexing for more).</p>
<p>This is index for Series, columns for DataFrame.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Info axis.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="apache_beam.dataframe.partitionings.html#apache_beam.dataframe.partitionings.Index" title="apache_beam.dataframe.partitionings.Index">Index</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.align">
<code class="descname">align</code><span class="sig-paren">(</span><em>other</em>, <em>join</em>, <em>axis</em>, <em>copy</em>, <em>level</em>, <em>method</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.align"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.align" title="Permalink to this definition"></a></dt>
<dd><p>Align two objects on their axes with the specified join method.</p>
<p>Join method is specified for each axis Index.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a><em> or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a>) – </li>
<li><strong>join</strong> (<em>{'outer'</em><em>, </em><em>'inner'</em><em>, </em><em>'left'</em><em>, </em><em>'right'}</em><em>, </em><em>default 'outer'</em>) – </li>
<li><strong>axis</strong> (<em>allowed axis of the other object</em><em>, </em><em>default None</em>) – Align on index (0), columns (1), or both (None).</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>copy</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Always returns new objects. If copy=False and no reindexing is
required then original objects are returned.</li>
<li><strong>fill_value</strong> (<em>scalar</em><em>, </em><em>default np.NaN</em>) – Value to use for missing values. Defaults to NaN, but can be any
“compatible” value.</li>
<li><strong>method</strong> (<em>{'backfill'</em><em>, </em><em>'bfill'</em><em>, </em><em>'pad'</em><em>, </em><em>'ffill'</em><em>, </em><em>None}</em><em>, </em><em>default None</em>) – <p>Method to use for filling holes in reindexed DeferredSeries:</p>
<ul>
<li>pad / ffill: propagate last valid observation forward to next valid.</li>
<li>backfill / bfill: use NEXT valid observation to fill gap.</li>
</ul>
</li>
<li><strong>limit</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default None</em>) – If method is specified, this is the maximum number of consecutive
NaN values to forward/backward fill. In other words, if there is
a gap with more than this number of consecutive NaNs, it will only
be partially filled. If method is not specified, this is the
maximum number of entries along the entire axis where NaNs will be
filled. Must be greater than 0 if not None.</li>
<li><strong>fill_axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – Filling axis, method and limit.</li>
<li><strong>broadcast_axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default None</em>) – Broadcast values along this axis, if aligning two objects of
different dimensions.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><strong>(left, right)</strong> – Aligned objects.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">(<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a>, type of other)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Aligning per level is not yet supported. Only the default,
<code class="docutils literal notranslate"><span class="pre">level=None</span></code>, is allowed.</p>
<p>Filling NaN values via <code class="docutils literal notranslate"><span class="pre">method</span></code> is not supported, because it is
<a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">order-sensitive</a>. Only the
default, <code class="docutils literal notranslate"><span class="pre">method=None</span></code>, is allowed.</p>
<p><code class="docutils literal notranslate"><span class="pre">copy=False</span></code> is not supported because its behavior (whether or not it is
an inplace operation) depends on the data.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.append">
<code class="descname">append</code><span class="sig-paren">(</span><em>other</em>, <em>ignore_index</em>, <em>verify_integrity</em>, <em>sort</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.append"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.append" title="Permalink to this definition"></a></dt>
<dd><p>Append rows of <cite>other</cite> to the end of caller, returning a new object.</p>
<p>Columns in <cite>other</cite> that are not in the caller are added as new columns.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a><em> or </em><em>DeferredSeries/dict-like object</em><em>, or </em><em>list of these</em>) – The data to append.</li>
<li><strong>ignore_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If True, the resulting axis will be labeled 0, 1, …, n - 1.</li>
<li><strong>verify_integrity</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If True, raise ValueError on creating index with duplicates.</li>
<li><strong>sort</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>Sort columns if the columns of <cite>self</cite> and <cite>other</cite> are not aligned.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.0.0: </span>Changed to not sort by default.</p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A new DeferredDataFrame consisting of the rows of caller and the rows of <cite>other</cite>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">ignore_index=True</span></code> is not supported, because it requires generating an
order-sensitive index.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">concat()</span></code></dt>
<dd>General function to concatenate DeferredDataFrame or DeferredSeries objects.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>If a list of dict/series is passed and the keys are all contained in
the DeferredDataFrame’s index, the order of the columns in the resulting
DeferredDataFrame will be unchanged.</p>
<p>Iteratively appending rows to a DeferredDataFrame can be more computationally
intensive than a single concatenate. A better solution is to append
those rows to a list and then concatenate the list with the original
DeferredDataFrame all at once.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]],</span> <span class="n">columns</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="s1">&#39;AB&#39;</span><span class="p">),</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;x&#39;</span><span class="p">,</span> <span class="s1">&#39;y&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">x 1 2</span>
<span class="go">y 3 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span> <span class="p">[</span><span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">]],</span> <span class="n">columns</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="s1">&#39;AB&#39;</span><span class="p">),</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;x&#39;</span><span class="p">,</span> <span class="s1">&#39;y&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">df2</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">x 1 2</span>
<span class="go">y 3 4</span>
<span class="go">x 5 6</span>
<span class="go">y 7 8</span>
<span class="go">With `ignore_index` set to True:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">ignore_index</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 1 2</span>
<span class="go">1 3 4</span>
<span class="go">2 5 6</span>
<span class="go">3 7 8</span>
<span class="go">The following, while not recommended methods for generating DataFrames,</span>
<span class="go">show two ways to generate a DataFrame from multiple data sources.</span>
<span class="go">Less efficient:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">):</span>
<span class="gp">... </span> <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">append</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="n">i</span><span class="p">},</span> <span class="n">ignore_index</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A</span>
<span class="go">0 0</span>
<span class="go">1 1</span>
<span class="go">2 2</span>
<span class="go">3 3</span>
<span class="go">4 4</span>
<span class="go">More efficient:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">concat</span><span class="p">([</span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([</span><span class="n">i</span><span class="p">],</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">])</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">5</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">ignore_index</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> A</span>
<span class="go">0 0</span>
<span class="go">1 1</span>
<span class="go">2 2</span>
<span class="go">3 3</span>
<span class="go">4 4</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.get">
<code class="descname">get</code><span class="sig-paren">(</span><em>key</em>, <em>default_value=None</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.get"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.get" title="Permalink to this definition"></a></dt>
<dd><p>Get item from object for given key (ex: DataFrame column).</p>
<p>Returns default value if not found.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>key</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><em>object</em></a>) – </td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><strong>value</strong></td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body">same type as items contained in object</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.set_index">
<code class="descname">set_index</code><span class="sig-paren">(</span><em>keys</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.set_index"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.set_index" title="Permalink to this definition"></a></dt>
<dd><p>Set the DataFrame index using existing columns.</p>
<p>Set the DataFrame index (row labels) using one or more existing
columns or arrays (of the correct length). The index can replace the
existing index or expand on it.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>keys</strong> (<em>label</em><em> or </em><em>array-like</em><em> or </em><em>list of labels/arrays</em>) – This parameter can be either a single column key, a single array of
the same length as the calling DeferredDataFrame, or a list containing an
arbitrary combination of column keys and arrays. Here, “array”
encompasses <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredSeries</span></code></a>, <code class="xref py py-class docutils literal notranslate"><span class="pre">Index</span></code>, <code class="docutils literal notranslate"><span class="pre">np.ndarray</span></code>, and
instances of <a class="reference external" href="https://docs.python.org/3/library/collections.abc.html#collections.abc.Iterator" title="(in Python v3.10)"><code class="xref py py-class docutils literal notranslate"><span class="pre">Iterator</span></code></a>.</li>
<li><strong>drop</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Delete columns to be used as the new index.</li>
<li><strong>append</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Whether to append columns to existing index.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If True, modifies the DeferredDataFrame in place (do not create a new object).</li>
<li><strong>verify_integrity</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Check the new index for duplicates. Otherwise defer the check until
necessary. Setting to False will improve the performance of this
method.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Changed row labels or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">keys</span></code> must be a <code class="docutils literal notranslate"><span class="pre">str</span></code> or <code class="docutils literal notranslate"><span class="pre">List[str]</span></code>. Passing an Index or Series
is not yet supported (<a class="reference external" href="https://issues.apache.org/jira/browse/BEAM-11711">BEAM-11711</a>).</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.reset_index" title="apache_beam.dataframe.frames.DeferredDataFrame.reset_index"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.reset_index()</span></code></a></dt>
<dd>Opposite of set_index.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.reindex" title="apache_beam.dataframe.frames.DeferredDataFrame.reindex"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.reindex()</span></code></a></dt>
<dd>Change to new indices or expand indices.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.reindex_like" title="apache_beam.dataframe.frames.DeferredDataFrame.reindex_like"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.reindex_like()</span></code></a></dt>
<dd>Change to same indices as other DeferredDataFrame.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;month&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">10</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;year&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">2012</span><span class="p">,</span> <span class="mi">2014</span><span class="p">,</span> <span class="mi">2013</span><span class="p">,</span> <span class="mi">2014</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;sale&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">55</span><span class="p">,</span> <span class="mi">40</span><span class="p">,</span> <span class="mi">84</span><span class="p">,</span> <span class="mi">31</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> month year sale</span>
<span class="go">0 1 2012 55</span>
<span class="go">1 4 2014 40</span>
<span class="go">2 7 2013 84</span>
<span class="go">3 10 2014 31</span>
<span class="go">Set the index to become the &#39;month&#39; column:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">set_index</span><span class="p">(</span><span class="s1">&#39;month&#39;</span><span class="p">)</span>
<span class="go"> year sale</span>
<span class="go">month</span>
<span class="go">1 2012 55</span>
<span class="go">4 2014 40</span>
<span class="go">7 2013 84</span>
<span class="go">10 2014 31</span>
<span class="go">Create a MultiIndex using columns &#39;year&#39; and &#39;month&#39;:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">set_index</span><span class="p">([</span><span class="s1">&#39;year&#39;</span><span class="p">,</span> <span class="s1">&#39;month&#39;</span><span class="p">])</span>
<span class="go"> sale</span>
<span class="go">year month</span>
<span class="go">2012 1 55</span>
<span class="go">2014 4 40</span>
<span class="go">2013 7 84</span>
<span class="go">2014 10 31</span>
<span class="go">Create a MultiIndex using an Index and a column:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">set_index</span><span class="p">([</span><span class="n">pd</span><span class="o">.</span><span class="n">Index</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]),</span> <span class="s1">&#39;year&#39;</span><span class="p">])</span>
<span class="go"> month sale</span>
<span class="go"> year</span>
<span class="go">1 2012 1 55</span>
<span class="go">2 2014 4 40</span>
<span class="go">3 2013 7 84</span>
<span class="go">4 2014 10 31</span>
<span class="go">Create a MultiIndex using two Series:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">set_index</span><span class="p">([</span><span class="n">s</span><span class="p">,</span> <span class="n">s</span><span class="o">**</span><span class="mi">2</span><span class="p">])</span>
<span class="go"> month year sale</span>
<span class="go">1 1 1 2012 55</span>
<span class="go">2 4 4 2014 40</span>
<span class="go">3 9 7 2013 84</span>
<span class="go">4 16 10 2014 31</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.set_axis">
<code class="descname">set_axis</code><span class="sig-paren">(</span><em>labels</em>, <em>axis</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.set_axis"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.set_axis" title="Permalink to this definition"></a></dt>
<dd><p>Assign desired index to given axis.</p>
<p>Indexes for column or row labels can be changed by assigning
a list-like or Index.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>labels</strong> (<em>list-like</em><em>, </em><a class="reference internal" href="apache_beam.dataframe.partitionings.html#apache_beam.dataframe.partitionings.Index" title="apache_beam.dataframe.partitionings.Index"><em>Index</em></a>) – The values for the new index.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – The axis to update. The value 0 identifies the rows, and 1 identifies the columns.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Whether to return a new DeferredDataFrame instance.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><strong>renamed</strong> – An object of type DeferredDataFrame or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.rename_axis" title="apache_beam.dataframe.frames.DeferredDataFrame.rename_axis"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.rename_axis()</span></code></a></dt>
<dd>Alter the name of the index or columns. Examples ——– &gt;&gt;&gt; df = pd.DeferredDataFrame({“A”: [1, 2, 3], “B”: [4, 5, 6]}) Change the row labels. &gt;&gt;&gt; df.set_axis([‘a’, ‘b’, ‘c’], axis=’index’) A B a 1 4 b 2 5 c 3 6 Change the column labels. &gt;&gt;&gt; df.set_axis([‘I’, ‘II’], axis=’columns’) I II 0 1 4 1 2 5 2 3 6 Now, update the labels inplace. &gt;&gt;&gt; df.set_axis([‘i’, ‘ii’], axis=’columns’, inplace=True) &gt;&gt;&gt; df i ii 0 1 4 1 2 5 2 3 6</dd>
</dl>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.axes">
<code class="descname">axes</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.axes" title="Permalink to this definition"></a></dt>
<dd><p>Return a list representing the axes of the DataFrame.</p>
<p>It has the row axis labels and column axis labels as the only members.
They are returned in that order.</p>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">axes</span>
<span class="go">[RangeIndex(start=0, stop=2, step=1), Index([&#39;col1&#39;, &#39;col2&#39;],</span>
<span class="go">dtype=&#39;object&#39;)]</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.dtypes">
<code class="descname">dtypes</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.dtypes" title="Permalink to this definition"></a></dt>
<dd><p>Return the dtypes in the DataFrame.</p>
<p>This returns a Series with the data type of each column.
The result’s index is the original DataFrame’s columns. Columns
with mixed types are stored with the <code class="docutils literal notranslate"><span class="pre">object</span></code> dtype. See
<a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/basics.html#basics-dtypes" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">the User Guide</span></a> for more.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">The data type of each column.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">pandas.DeferredSeries</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;float&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mf">1.0</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;int&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;datetime&#39;</span><span class="p">:</span> <span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;20180310&#39;</span><span class="p">)],</span>
<span class="gp">... </span> <span class="s1">&#39;string&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;foo&#39;</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dtypes</span>
<span class="go">float float64</span>
<span class="go">int int64</span>
<span class="go">datetime datetime64[ns]</span>
<span class="go">string object</span>
<span class="go">dtype: object</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.assign">
<code class="descname">assign</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.assign"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.assign" title="Permalink to this definition"></a></dt>
<dd><p>Assign new columns to a DataFrame.</p>
<p>Returns a new object with all original columns in addition to new ones.
Existing columns that are re-assigned will be overwritten.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>**kwargs</strong> (<em>dict of {str: callable</em><em> or </em><em>DeferredSeries}</em>) – The column names are keywords. If the values are
callable, they are computed on the DeferredDataFrame and
assigned to the new columns. The callable must not
change input DeferredDataFrame (though pandas doesn’t check it).
If the values are not callable, (e.g. a DeferredSeries, scalar, or array),
they are simply assigned.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A new DeferredDataFrame with the new columns in addition to
all the existing columns.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">value</span></code> must be a <code class="docutils literal notranslate"><span class="pre">callable</span></code> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredSeries</span></code></a>. Other types
make this operation order-sensitive.</p>
<p class="rubric">Notes</p>
<p>Assigning multiple columns within the same <code class="docutils literal notranslate"><span class="pre">assign</span></code> is possible.
Later items in ‘**kwargs’ may refer to newly created or modified
columns in ‘df’; items are computed and assigned into ‘df’ in order.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;temp_c&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mf">17.0</span><span class="p">,</span> <span class="mf">25.0</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;Portland&#39;</span><span class="p">,</span> <span class="s1">&#39;Berkeley&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> temp_c</span>
<span class="go">Portland 17.0</span>
<span class="go">Berkeley 25.0</span>
<span class="go">Where the value is a callable, evaluated on `df`:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">assign</span><span class="p">(</span><span class="n">temp_f</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">temp_c</span> <span class="o">*</span> <span class="mi">9</span> <span class="o">/</span> <span class="mi">5</span> <span class="o">+</span> <span class="mi">32</span><span class="p">)</span>
<span class="go"> temp_c temp_f</span>
<span class="go">Portland 17.0 62.6</span>
<span class="go">Berkeley 25.0 77.0</span>
<span class="go">Alternatively, the same behavior can be achieved by directly</span>
<span class="go">referencing an existing Series or sequence:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">assign</span><span class="p">(</span><span class="n">temp_f</span><span class="o">=</span><span class="n">df</span><span class="p">[</span><span class="s1">&#39;temp_c&#39;</span><span class="p">]</span> <span class="o">*</span> <span class="mi">9</span> <span class="o">/</span> <span class="mi">5</span> <span class="o">+</span> <span class="mi">32</span><span class="p">)</span>
<span class="go"> temp_c temp_f</span>
<span class="go">Portland 17.0 62.6</span>
<span class="go">Berkeley 25.0 77.0</span>
<span class="go">You can create multiple columns within the same assign where one</span>
<span class="go">of the columns depends on another one defined within the same assign:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">assign</span><span class="p">(</span><span class="n">temp_f</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">[</span><span class="s1">&#39;temp_c&#39;</span><span class="p">]</span> <span class="o">*</span> <span class="mi">9</span> <span class="o">/</span> <span class="mi">5</span> <span class="o">+</span> <span class="mi">32</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">temp_k</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="p">(</span><span class="n">x</span><span class="p">[</span><span class="s1">&#39;temp_f&#39;</span><span class="p">]</span> <span class="o">+</span> <span class="mf">459.67</span><span class="p">)</span> <span class="o">*</span> <span class="mi">5</span> <span class="o">/</span> <span class="mi">9</span><span class="p">)</span>
<span class="go"> temp_c temp_f temp_k</span>
<span class="go">Portland 17.0 62.6 290.15</span>
<span class="go">Berkeley 25.0 77.0 298.15</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.explode">
<code class="descname">explode</code><span class="sig-paren">(</span><em>column</em>, <em>ignore_index</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.explode"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.explode" title="Permalink to this definition"></a></dt>
<dd><p>Transform each element of a list-like to a row, replicating index values.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 0.25.0.</span></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>column</strong> (<em>IndexLabel</em>) – <p>Column(s) to explode.
For multiple columns, specify a non-empty list with each element
be str or tuple, and all specified columns their list-like data
on same row of the frame must have matching length.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.3.0: </span>Multi-column explode</p>
</div>
</li>
<li><strong>ignore_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>If True, the resulting index will be labeled 0, 1, …, n - 1.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Exploded lists to rows of the subset columns;
index will be duplicated for these rows.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first"><em>ValueError :</em> – * If columns of the frame are not unique.
* If specified columns to explode is empty list.
* If specified columns to explode have not matching count of</p>
<blockquote class="last">
<div><p>elements rowwise in the frame.</p>
</div></blockquote>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.unstack" title="apache_beam.dataframe.frames.DeferredDataFrame.unstack"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.unstack()</span></code></a></dt>
<dd>Pivot a level of the (necessarily hierarchical) index labels.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.melt" title="apache_beam.dataframe.frames.DeferredDataFrame.melt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.melt()</span></code></a></dt>
<dd>Unpivot a DeferredDataFrame from wide format to long format.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.explode" title="apache_beam.dataframe.frames.DeferredSeries.explode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.explode()</span></code></a></dt>
<dd>Explode a DeferredDataFrame from list-like columns to long format.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>This routine will explode list-likes including lists, tuples, sets,
DeferredSeries, and np.ndarray. The result dtype of the subset rows will
be object. Scalars will be returned unchanged, and empty list-likes will
result in a np.nan for that row. In addition, the ordering of rows in the
output will be non-deterministic when exploding sets.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;foo&#39;</span><span class="p">,</span> <span class="p">[],</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]],</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;C&#39;</span><span class="p">:</span> <span class="p">[[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">],</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="p">[],</span> <span class="p">[</span><span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">]]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B C</span>
<span class="go">0 [0, 1, 2] 1 [a, b, c]</span>
<span class="go">1 foo 1 NaN</span>
<span class="go">2 [] 1 []</span>
<span class="go">3 [3, 4] 1 [d, e]</span>
<span class="go">Single-column explode.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">explode</span><span class="p">(</span><span class="s1">&#39;A&#39;</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 0 1 [a, b, c]</span>
<span class="go">0 1 1 [a, b, c]</span>
<span class="go">0 2 1 [a, b, c]</span>
<span class="go">1 foo 1 NaN</span>
<span class="go">2 NaN 1 []</span>
<span class="go">3 3 1 [d, e]</span>
<span class="go">3 4 1 [d, e]</span>
<span class="go">Multi-column explode.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">explode</span><span class="p">(</span><span class="nb">list</span><span class="p">(</span><span class="s1">&#39;AC&#39;</span><span class="p">))</span>
<span class="go"> A B C</span>
<span class="go">0 0 1 a</span>
<span class="go">0 1 1 b</span>
<span class="go">0 2 1 c</span>
<span class="go">1 foo 1 NaN</span>
<span class="go">2 NaN 1 NaN</span>
<span class="go">3 3 1 d</span>
<span class="go">3 4 1 e</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.insert">
<code class="descname">insert</code><span class="sig-paren">(</span><em>value</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.insert"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.insert" title="Permalink to this definition"></a></dt>
<dd><p>Insert column into DataFrame at specified location.</p>
<p>Raises a ValueError if <cite>column</cite> is already contained in the DataFrame,
unless <cite>allow_duplicates</cite> is set to True.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>loc</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a>) – Insertion index. Must verify 0 &lt;= loc &lt;= len(columns).</li>
<li><strong>column</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>number</em><em>, or </em><em>hashable object</em>) – Label of the inserted column.</li>
<li><strong>value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><em>array-like</em>) – </li>
<li><strong>allow_duplicates</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>optional</em>) – </li>
</ul>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">value</span></code> cannot be a <code class="docutils literal notranslate"><span class="pre">List</span></code> because aligning it with this
DeferredDataFrame is order-sensitive.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">Index.insert()</span></code></dt>
<dd>Insert new item by index.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> col1 col2</span>
<span class="go">0 1 3</span>
<span class="go">1 2 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;newcol&quot;</span><span class="p">,</span> <span class="p">[</span><span class="mi">99</span><span class="p">,</span> <span class="mi">99</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> col1 newcol col2</span>
<span class="go">0 1 99 3</span>
<span class="go">1 2 99 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;col1&quot;</span><span class="p">,</span> <span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span> <span class="n">allow_duplicates</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> col1 col1 newcol col2</span>
<span class="go">0 100 1 99 3</span>
<span class="go">1 100 2 99 4</span>
<span class="go">Notice that pandas uses index alignment in case of `value` from type `Series`:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;col0&quot;</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> col0 col1 col1 newcol col2</span>
<span class="go">0 NaN 100 1 99 3</span>
<span class="go">1 5.0 100 2 99 4</span>
</pre></div>
</div>
</dd></dl>
<dl class="staticmethod">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.from_dict">
<em class="property">static </em><code class="descname">from_dict</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.from_dict"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.from_dict" title="Permalink to this definition"></a></dt>
<dd><p>Construct DataFrame from dict of array-like or dicts.</p>
<p>Creates DataFrame object from dictionary by columns or by index
allowing dtype specification.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>data</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a>) – Of the form {field : array-like} or {field : dict}.</li>
<li><strong>orient</strong> (<em>{'columns'</em><em>, </em><em>'index'}</em><em>, </em><em>default 'columns'</em>) – The “orientation” of the data. If the keys of the passed dict
should be the columns of the resulting DeferredDataFrame, pass ‘columns’
(default). Otherwise if the keys should be rows, pass ‘index’.</li>
<li><strong>dtype</strong> (<em>dtype</em><em>, </em><em>default None</em>) – Data type to force, otherwise infer.</li>
<li><strong>columns</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, </em><em>default None</em>) – Column labels to use when <code class="docutils literal notranslate"><span class="pre">orient='index'</span></code>. Raises a ValueError
if used with <code class="docutils literal notranslate"><span class="pre">orient='columns'</span></code>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.from_records" title="apache_beam.dataframe.frames.DeferredDataFrame.from_records"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.from_records()</span></code></a></dt>
<dd>DeferredDataFrame from structured ndarray, sequence of tuples or dicts, or DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame()</span></code></a></dt>
<dd>DeferredDataFrame object creation using constructor.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">By default the keys of the dict become the DataFrame columns:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">data</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;col_1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;col_2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">]}</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_dict</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="go"> col_1 col_2</span>
<span class="go">0 3 a</span>
<span class="go">1 2 b</span>
<span class="go">2 1 c</span>
<span class="go">3 0 d</span>
<span class="go">Specify ``orient=&#39;index&#39;`` to create the DataFrame using dictionary</span>
<span class="go">keys as rows:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">data</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;row_1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;row_2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">]}</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_dict</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">orient</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> 0 1 2 3</span>
<span class="go">row_1 3 2 1 0</span>
<span class="go">row_2 a b c d</span>
<span class="go">When using the &#39;index&#39; orientation, the column names can be</span>
<span class="go">specified manually:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_dict</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">orient</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;D&#39;</span><span class="p">])</span>
<span class="go"> A B C D</span>
<span class="go">row_1 3 2 1 0</span>
<span class="go">row_2 a b c d</span>
</pre></div>
</div>
</dd></dl>
<dl class="staticmethod">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.from_records">
<em class="property">static </em><code class="descname">from_records</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.from_records"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.from_records" title="Permalink to this definition"></a></dt>
<dd><p>Convert structured or record ndarray to DataFrame.</p>
<p>Creates a DataFrame object from a structured ndarray, sequence of
tuples or dicts, or DataFrame.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>data</strong> (<em>structured ndarray</em><em>, </em><em>sequence of tuples</em><em> or </em><em>dicts</em><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Structured input data.</li>
<li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>list of fields</em><em>, </em><em>array-like</em>) – Field of array to use as the index, alternately a specific set of
input labels to use.</li>
<li><strong>exclude</strong> (<em>sequence</em><em>, </em><em>default None</em>) – Columns or fields to exclude.</li>
<li><strong>columns</strong> (<em>sequence</em><em>, </em><em>default None</em>) – Column names to use. If the passed data do not have names
associated with them, this argument provides names for the
columns. Otherwise this argument indicates the order of the columns
in the result (any names not found in the data will become all-NA
columns).</li>
<li><strong>coerce_float</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Attempt to convert values of non-string, non-numeric objects (like
decimal.Decimal) to floating point, useful for SQL result sets.</li>
<li><strong>nrows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default None</em>) – Number of rows to read if data is an iterator.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.from_dict" title="apache_beam.dataframe.frames.DeferredDataFrame.from_dict"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.from_dict()</span></code></a></dt>
<dd>DeferredDataFrame from dict of array-like or dicts.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame()</span></code></a></dt>
<dd>DeferredDataFrame object creation using constructor.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Data can be provided as a structured ndarray:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">data</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([(</span><span class="mi">3</span><span class="p">,</span> <span class="s1">&#39;a&#39;</span><span class="p">),</span> <span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">),</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">dtype</span><span class="o">=</span><span class="p">[(</span><span class="s1">&#39;col_1&#39;</span><span class="p">,</span> <span class="s1">&#39;i4&#39;</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;col_2&#39;</span><span class="p">,</span> <span class="s1">&#39;U1&#39;</span><span class="p">)])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_records</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="go"> col_1 col_2</span>
<span class="go">0 3 a</span>
<span class="go">1 2 b</span>
<span class="go">2 1 c</span>
<span class="go">3 0 d</span>
<span class="go">Data can be provided as a list of dicts:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">data</span> <span class="o">=</span> <span class="p">[{</span><span class="s1">&#39;col_1&#39;</span><span class="p">:</span> <span class="mi">3</span><span class="p">,</span> <span class="s1">&#39;col_2&#39;</span><span class="p">:</span> <span class="s1">&#39;a&#39;</span><span class="p">},</span>
<span class="gp">... </span> <span class="p">{</span><span class="s1">&#39;col_1&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;col_2&#39;</span><span class="p">:</span> <span class="s1">&#39;b&#39;</span><span class="p">},</span>
<span class="gp">... </span> <span class="p">{</span><span class="s1">&#39;col_1&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;col_2&#39;</span><span class="p">:</span> <span class="s1">&#39;c&#39;</span><span class="p">},</span>
<span class="gp">... </span> <span class="p">{</span><span class="s1">&#39;col_1&#39;</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span> <span class="s1">&#39;col_2&#39;</span><span class="p">:</span> <span class="s1">&#39;d&#39;</span><span class="p">}]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_records</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="go"> col_1 col_2</span>
<span class="go">0 3 a</span>
<span class="go">1 2 b</span>
<span class="go">2 1 c</span>
<span class="go">3 0 d</span>
<span class="go">Data can be provided as a list of tuples with corresponding columns:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">data</span> <span class="o">=</span> <span class="p">[(</span><span class="mi">3</span><span class="p">,</span> <span class="s1">&#39;a&#39;</span><span class="p">),</span> <span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">),</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">)]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="o">.</span><span class="n">from_records</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;col_1&#39;</span><span class="p">,</span> <span class="s1">&#39;col_2&#39;</span><span class="p">])</span>
<span class="go"> col_1 col_2</span>
<span class="go">0 3 a</span>
<span class="go">1 2 b</span>
<span class="go">2 1 c</span>
<span class="go">3 0 d</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.duplicated">
<code class="descname">duplicated</code><span class="sig-paren">(</span><em>keep</em>, <em>subset</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.duplicated"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.duplicated" title="Permalink to this definition"></a></dt>
<dd><p>Return boolean Series denoting duplicate rows.</p>
<p>Considering certain columns is optional.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>subset</strong> (<em>column label</em><em> or </em><em>sequence of labels</em><em>, </em><em>optional</em>) – Only consider certain columns for identifying duplicates, by
default use all of the columns.</li>
<li><strong>keep</strong> (<em>{'first'</em><em>, </em><em>'last'</em><em>, </em><em>False}</em><em>, </em><em>default 'first'</em>) – <p>Determines which duplicates (if any) to mark.</p>
<ul>
<li><code class="docutils literal notranslate"><span class="pre">first</span></code> : Mark duplicates as <code class="docutils literal notranslate"><span class="pre">True</span></code> except for the first occurrence.</li>
<li><code class="docutils literal notranslate"><span class="pre">last</span></code> : Mark duplicates as <code class="docutils literal notranslate"><span class="pre">True</span></code> except for the last occurrence.</li>
<li>False : Mark all duplicates as <code class="docutils literal notranslate"><span class="pre">True</span></code>.</li>
</ul>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Boolean series for each duplicated rows.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only <code class="docutils literal notranslate"><span class="pre">keep=False</span></code> and <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> are supported. Other values of
<code class="docutils literal notranslate"><span class="pre">keep</span></code> make this an order-sensitive operation. Note <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> is
a Beam-specific option that guarantees only one duplicate will be kept, but
unlike <code class="docutils literal notranslate"><span class="pre">&quot;first&quot;</span></code> and <code class="docutils literal notranslate"><span class="pre">&quot;last&quot;</span></code> it makes no guarantees about _which_
duplicate element is kept.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">Index.duplicated()</span></code></dt>
<dd>Equivalent method on index.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.duplicated" title="apache_beam.dataframe.frames.DeferredSeries.duplicated"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.duplicated()</span></code></a></dt>
<dd>Equivalent method on DeferredSeries.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.drop_duplicates" title="apache_beam.dataframe.frames.DeferredSeries.drop_duplicates"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.drop_duplicates()</span></code></a></dt>
<dd>Remove duplicate values from DeferredSeries.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.drop_duplicates" title="apache_beam.dataframe.frames.DeferredDataFrame.drop_duplicates"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.drop_duplicates()</span></code></a></dt>
<dd>Remove duplicate values from DeferredDataFrame.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Consider dataset containing ramen rating.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span>
<span class="gp">... </span> <span class="s1">&#39;brand&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;Yum Yum&#39;</span><span class="p">,</span> <span class="s1">&#39;Yum Yum&#39;</span><span class="p">,</span> <span class="s1">&#39;Indomie&#39;</span><span class="p">,</span> <span class="s1">&#39;Indomie&#39;</span><span class="p">,</span> <span class="s1">&#39;Indomie&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;style&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;cup&#39;</span><span class="p">,</span> <span class="s1">&#39;cup&#39;</span><span class="p">,</span> <span class="s1">&#39;cup&#39;</span><span class="p">,</span> <span class="s1">&#39;pack&#39;</span><span class="p">,</span> <span class="s1">&#39;pack&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;rating&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mf">3.5</span><span class="p">,</span> <span class="mi">15</span><span class="p">,</span> <span class="mi">5</span><span class="p">]</span>
<span class="gp">... </span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> brand style rating</span>
<span class="go">0 Yum Yum cup 4.0</span>
<span class="go">1 Yum Yum cup 4.0</span>
<span class="go">2 Indomie cup 3.5</span>
<span class="go">3 Indomie pack 15.0</span>
<span class="go">4 Indomie pack 5.0</span>
<span class="go">By default, for each set of duplicated values, the first occurrence</span>
<span class="go">is set on False and all others on True.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">duplicated</span><span class="p">()</span>
<span class="go">0 False</span>
<span class="go">1 True</span>
<span class="go">2 False</span>
<span class="go">3 False</span>
<span class="go">4 False</span>
<span class="go">dtype: bool</span>
<span class="go">By using &#39;last&#39;, the last occurrence of each set of duplicated values</span>
<span class="go">is set on False and all others on True.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">duplicated</span><span class="p">(</span><span class="n">keep</span><span class="o">=</span><span class="s1">&#39;last&#39;</span><span class="p">)</span>
<span class="go">0 True</span>
<span class="go">1 False</span>
<span class="go">2 False</span>
<span class="go">3 False</span>
<span class="go">4 False</span>
<span class="go">dtype: bool</span>
<span class="go">By setting ``keep`` on False, all duplicates are True.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">duplicated</span><span class="p">(</span><span class="n">keep</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">0 True</span>
<span class="go">1 True</span>
<span class="go">2 False</span>
<span class="go">3 False</span>
<span class="go">4 False</span>
<span class="go">dtype: bool</span>
<span class="go">To find duplicates on specific column(s), use ``subset``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">duplicated</span><span class="p">(</span><span class="n">subset</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;brand&#39;</span><span class="p">])</span>
<span class="go">0 False</span>
<span class="go">1 True</span>
<span class="go">2 False</span>
<span class="go">3 True</span>
<span class="go">4 True</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.drop_duplicates">
<code class="descname">drop_duplicates</code><span class="sig-paren">(</span><em>keep</em>, <em>subset</em>, <em>ignore_index</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.drop_duplicates"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.drop_duplicates" title="Permalink to this definition"></a></dt>
<dd><p>Return DataFrame with duplicate rows removed.</p>
<p>Considering certain columns is optional. Indexes, including time indexes
are ignored.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>subset</strong> (<em>column label</em><em> or </em><em>sequence of labels</em><em>, </em><em>optional</em>) – Only consider certain columns for identifying duplicates, by
default use all of the columns.</li>
<li><strong>keep</strong> (<em>{'first'</em><em>, </em><em>'last'</em><em>, </em><em>False}</em><em>, </em><em>default 'first'</em>) – Determines which duplicates (if any) to keep.
- <code class="docutils literal notranslate"><span class="pre">first</span></code> : Drop duplicates except for the first occurrence.
- <code class="docutils literal notranslate"><span class="pre">last</span></code> : Drop duplicates except for the last occurrence.
- False : Drop all duplicates.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Whether to drop duplicates in place or to return a copy.</li>
<li><strong>ignore_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>If True, the resulting axis will be labeled 0, 1, …, n - 1.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.0.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">DeferredDataFrame with duplicates removed or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only <code class="docutils literal notranslate"><span class="pre">keep=False</span></code> and <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> are supported. Other values of
<code class="docutils literal notranslate"><span class="pre">keep</span></code> make this an order-sensitive operation. Note <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> is
a Beam-specific option that guarantees only one duplicate will be kept, but
unlike <code class="docutils literal notranslate"><span class="pre">&quot;first&quot;</span></code> and <code class="docutils literal notranslate"><span class="pre">&quot;last&quot;</span></code> it makes no guarantees about _which_
duplicate element is kept.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.value_counts" title="apache_beam.dataframe.frames.DeferredDataFrame.value_counts"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.value_counts()</span></code></a></dt>
<dd>Count unique combinations of columns.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Consider dataset containing ramen rating.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span>
<span class="gp">... </span> <span class="s1">&#39;brand&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;Yum Yum&#39;</span><span class="p">,</span> <span class="s1">&#39;Yum Yum&#39;</span><span class="p">,</span> <span class="s1">&#39;Indomie&#39;</span><span class="p">,</span> <span class="s1">&#39;Indomie&#39;</span><span class="p">,</span> <span class="s1">&#39;Indomie&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;style&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;cup&#39;</span><span class="p">,</span> <span class="s1">&#39;cup&#39;</span><span class="p">,</span> <span class="s1">&#39;cup&#39;</span><span class="p">,</span> <span class="s1">&#39;pack&#39;</span><span class="p">,</span> <span class="s1">&#39;pack&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;rating&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mf">3.5</span><span class="p">,</span> <span class="mi">15</span><span class="p">,</span> <span class="mi">5</span><span class="p">]</span>
<span class="gp">... </span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> brand style rating</span>
<span class="go">0 Yum Yum cup 4.0</span>
<span class="go">1 Yum Yum cup 4.0</span>
<span class="go">2 Indomie cup 3.5</span>
<span class="go">3 Indomie pack 15.0</span>
<span class="go">4 Indomie pack 5.0</span>
<span class="go">By default, it removes duplicate rows based on all columns.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">drop_duplicates</span><span class="p">()</span>
<span class="go"> brand style rating</span>
<span class="go">0 Yum Yum cup 4.0</span>
<span class="go">2 Indomie cup 3.5</span>
<span class="go">3 Indomie pack 15.0</span>
<span class="go">4 Indomie pack 5.0</span>
<span class="go">To remove duplicates on specific column(s), use ``subset``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">drop_duplicates</span><span class="p">(</span><span class="n">subset</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;brand&#39;</span><span class="p">])</span>
<span class="go"> brand style rating</span>
<span class="go">0 Yum Yum cup 4.0</span>
<span class="go">2 Indomie cup 3.5</span>
<span class="go">To remove duplicates and keep last occurrences, use ``keep``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">drop_duplicates</span><span class="p">(</span><span class="n">subset</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;brand&#39;</span><span class="p">,</span> <span class="s1">&#39;style&#39;</span><span class="p">],</span> <span class="n">keep</span><span class="o">=</span><span class="s1">&#39;last&#39;</span><span class="p">)</span>
<span class="go"> brand style rating</span>
<span class="go">1 Yum Yum cup 4.0</span>
<span class="go">2 Indomie cup 3.5</span>
<span class="go">4 Indomie pack 5.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.aggregate">
<code class="descname">aggregate</code><span class="sig-paren">(</span><em>func</em>, <em>axis</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.aggregate"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.aggregate" title="Permalink to this definition"></a></dt>
<dd><p>Aggregate using one or more operations over the specified axis.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>func</strong> (<em>function</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a>) – <p>Function to use for aggregating the data. If a function, must either
work when passed a DeferredDataFrame or when passed to DeferredDataFrame.apply.</p>
<p>Accepted combinations are:</p>
<ul>
<li>function</li>
<li>string function name</li>
<li>list of functions and/or function names, e.g. <code class="docutils literal notranslate"><span class="pre">[np.sum,</span> <span class="pre">'mean']</span></code></li>
<li>dict of axis labels -&gt; functions, function names or list of such.</li>
</ul>
</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – If 0 or ‘index’: apply function to each column.
If 1 or ‘columns’: apply function to each row.</li>
<li><strong>*args</strong> – Positional arguments to pass to <cite>func</cite>.</li>
<li><strong>**kwargs</strong> – Keyword arguments to pass to <cite>func</cite>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last"><ul>
<li><p class="first"><em>scalar, DeferredSeries or DeferredDataFrame</em> – The return can be:</p>
<ul class="simple">
<li>scalar : when DeferredSeries.agg is called with single function</li>
<li>DeferredSeries : when DeferredDataFrame.agg is called with a single function</li>
<li>DeferredDataFrame : when DeferredDataFrame.agg is called with several functions</li>
</ul>
<p>Return scalar, DeferredSeries or DeferredDataFrame.</p>
</li>
<li><p class="first"><em>The aggregation operations are always performed over an axis, either the</em></p>
</li>
<li><p class="first"><em>index (default) or the column axis. This behavior is different from</em></p>
</li>
<li><p class="first"><cite>numpy</cite> aggregation functions (<cite>mean</cite>, <cite>median</cite>, <cite>prod</cite>, <cite>sum</cite>, <cite>std</cite>,</p>
</li>
<li><p class="first"><cite>var</cite>), where the default is to compute the aggregation of the flattened</p>
</li>
<li><p class="first">array, e.g., <code class="docutils literal notranslate"><span class="pre">numpy.mean(arr_2d)</span></code> as opposed to</p>
</li>
<li><p class="first"><code class="docutils literal notranslate"><span class="pre">numpy.mean(arr_2d,</span> <span class="pre">axis=0)</span></code>.</p>
</li>
<li><p class="first"><cite>agg</cite> is an alias for <cite>aggregate</cite>. Use the alias.</p>
</li>
</ul>
</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.apply" title="apache_beam.dataframe.frames.DeferredDataFrame.apply"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.apply()</span></code></a></dt>
<dd>Perform any type of operations.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.transform" title="apache_beam.dataframe.frames.DeferredDataFrame.transform"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.transform()</span></code></a></dt>
<dd>Perform transformation type operations.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">core.groupby.GroupBy()</span></code></dt>
<dd>Perform operations over groups.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">core.resample.Resampler()</span></code></dt>
<dd>Perform operations over resampled bins.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">core.window.Rolling()</span></code></dt>
<dd>Perform operations over rolling window.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">core.window.Expanding()</span></code></dt>
<dd>Perform operations over expanding window.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">core.window.ExponentialMovingWindow()</span></code></dt>
<dd>Perform operation over exponential weighted window.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p><cite>agg</cite> is an alias for <cite>aggregate</cite>. Use the alias.</p>
<p>Functions that mutate the passed object can produce unexpected
behavior or errors and are not supported. See <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/gotchas.html#gotchas-udf-mutation" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span>Mutating with User Defined Function (UDF) methods</span></a>
for more details.</p>
<p>A passed user-defined-function will be passed a DeferredSeries for evaluation.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">])</span>
<span class="go">Aggregate these functions over the rows.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">agg</span><span class="p">([</span><span class="s1">&#39;sum&#39;</span><span class="p">,</span> <span class="s1">&#39;min&#39;</span><span class="p">])</span>
<span class="go"> A B C</span>
<span class="go">sum 12.0 15.0 18.0</span>
<span class="go">min 1.0 2.0 3.0</span>
<span class="go">Different aggregations per column.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">agg</span><span class="p">({</span><span class="s1">&#39;A&#39;</span> <span class="p">:</span> <span class="p">[</span><span class="s1">&#39;sum&#39;</span><span class="p">,</span> <span class="s1">&#39;min&#39;</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span> <span class="p">:</span> <span class="p">[</span><span class="s1">&#39;min&#39;</span><span class="p">,</span> <span class="s1">&#39;max&#39;</span><span class="p">]})</span>
<span class="go"> A B</span>
<span class="go">sum 12.0 NaN</span>
<span class="go">min 1.0 2.0</span>
<span class="go">max NaN 8.0</span>
<span class="go">Aggregate different functions over the columns and rename the index of the resulting</span>
<span class="go">DataFrame.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="n">x</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="nb">max</span><span class="p">),</span> <span class="n">y</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;min&#39;</span><span class="p">),</span> <span class="n">z</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">))</span>
<span class="go"> A B C</span>
<span class="go">x 7.0 NaN NaN</span>
<span class="go">y NaN 2.0 NaN</span>
<span class="go">z NaN NaN 6.0</span>
<span class="go">Aggregate over the columns.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="s2">&quot;mean&quot;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="s2">&quot;columns&quot;</span><span class="p">)</span>
<span class="go">0 2.0</span>
<span class="go">1 5.0</span>
<span class="go">2 8.0</span>
<span class="go">3 NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.agg">
<code class="descname">agg</code><span class="sig-paren">(</span><em>func</em>, <em>axis</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.agg" title="Permalink to this definition"></a></dt>
<dd><p>Aggregate using one or more operations over the specified axis.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>func</strong> (<em>function</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a>) – <p>Function to use for aggregating the data. If a function, must either
work when passed a DeferredDataFrame or when passed to DeferredDataFrame.apply.</p>
<p>Accepted combinations are:</p>
<ul>
<li>function</li>
<li>string function name</li>
<li>list of functions and/or function names, e.g. <code class="docutils literal notranslate"><span class="pre">[np.sum,</span> <span class="pre">'mean']</span></code></li>
<li>dict of axis labels -&gt; functions, function names or list of such.</li>
</ul>
</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – If 0 or ‘index’: apply function to each column.
If 1 or ‘columns’: apply function to each row.</li>
<li><strong>*args</strong> – Positional arguments to pass to <cite>func</cite>.</li>
<li><strong>**kwargs</strong> – Keyword arguments to pass to <cite>func</cite>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first last"><ul>
<li><p class="first"><em>scalar, DeferredSeries or DeferredDataFrame</em> – The return can be:</p>
<ul class="simple">
<li>scalar : when DeferredSeries.agg is called with single function</li>
<li>DeferredSeries : when DeferredDataFrame.agg is called with a single function</li>
<li>DeferredDataFrame : when DeferredDataFrame.agg is called with several functions</li>
</ul>
<p>Return scalar, DeferredSeries or DeferredDataFrame.</p>
</li>
<li><p class="first"><em>The aggregation operations are always performed over an axis, either the</em></p>
</li>
<li><p class="first"><em>index (default) or the column axis. This behavior is different from</em></p>
</li>
<li><p class="first"><cite>numpy</cite> aggregation functions (<cite>mean</cite>, <cite>median</cite>, <cite>prod</cite>, <cite>sum</cite>, <cite>std</cite>,</p>
</li>
<li><p class="first"><cite>var</cite>), where the default is to compute the aggregation of the flattened</p>
</li>
<li><p class="first">array, e.g., <code class="docutils literal notranslate"><span class="pre">numpy.mean(arr_2d)</span></code> as opposed to</p>
</li>
<li><p class="first"><code class="docutils literal notranslate"><span class="pre">numpy.mean(arr_2d,</span> <span class="pre">axis=0)</span></code>.</p>
</li>
<li><p class="first"><cite>agg</cite> is an alias for <cite>aggregate</cite>. Use the alias.</p>
</li>
</ul>
</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.apply" title="apache_beam.dataframe.frames.DeferredDataFrame.apply"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.apply()</span></code></a></dt>
<dd>Perform any type of operations.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.transform" title="apache_beam.dataframe.frames.DeferredDataFrame.transform"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.transform()</span></code></a></dt>
<dd>Perform transformation type operations.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">core.groupby.GroupBy()</span></code></dt>
<dd>Perform operations over groups.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">core.resample.Resampler()</span></code></dt>
<dd>Perform operations over resampled bins.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">core.window.Rolling()</span></code></dt>
<dd>Perform operations over rolling window.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">core.window.Expanding()</span></code></dt>
<dd>Perform operations over expanding window.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">core.window.ExponentialMovingWindow()</span></code></dt>
<dd>Perform operation over exponential weighted window.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p><cite>agg</cite> is an alias for <cite>aggregate</cite>. Use the alias.</p>
<p>Functions that mutate the passed object can produce unexpected
behavior or errors and are not supported. See <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/gotchas.html#gotchas-udf-mutation" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span>Mutating with User Defined Function (UDF) methods</span></a>
for more details.</p>
<p>A passed user-defined-function will be passed a DeferredSeries for evaluation.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">])</span>
<span class="go">Aggregate these functions over the rows.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">agg</span><span class="p">([</span><span class="s1">&#39;sum&#39;</span><span class="p">,</span> <span class="s1">&#39;min&#39;</span><span class="p">])</span>
<span class="go"> A B C</span>
<span class="go">sum 12.0 15.0 18.0</span>
<span class="go">min 1.0 2.0 3.0</span>
<span class="go">Different aggregations per column.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">agg</span><span class="p">({</span><span class="s1">&#39;A&#39;</span> <span class="p">:</span> <span class="p">[</span><span class="s1">&#39;sum&#39;</span><span class="p">,</span> <span class="s1">&#39;min&#39;</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span> <span class="p">:</span> <span class="p">[</span><span class="s1">&#39;min&#39;</span><span class="p">,</span> <span class="s1">&#39;max&#39;</span><span class="p">]})</span>
<span class="go"> A B</span>
<span class="go">sum 12.0 NaN</span>
<span class="go">min 1.0 2.0</span>
<span class="go">max NaN 8.0</span>
<span class="go">Aggregate different functions over the columns and rename the index of the resulting</span>
<span class="go">DataFrame.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="n">x</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="nb">max</span><span class="p">),</span> <span class="n">y</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;min&#39;</span><span class="p">),</span> <span class="n">z</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">))</span>
<span class="go"> A B C</span>
<span class="go">x 7.0 NaN NaN</span>
<span class="go">y NaN 2.0 NaN</span>
<span class="go">z NaN NaN 6.0</span>
<span class="go">Aggregate over the columns.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">agg</span><span class="p">(</span><span class="s2">&quot;mean&quot;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="s2">&quot;columns&quot;</span><span class="p">)</span>
<span class="go">0 2.0</span>
<span class="go">1 5.0</span>
<span class="go">2 8.0</span>
<span class="go">3 NaN</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.applymap">
<code class="descname">applymap</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.applymap" title="Permalink to this definition"></a></dt>
<dd><p>Apply a function to a Dataframe elementwise.</p>
<p>This method applies a function that accepts and returns a scalar
to every element of a DataFrame.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>func</strong> (<em>callable</em>) – Python function, returns a single value from a single value.</li>
<li><strong>na_action</strong> (<em>{None</em><em>, </em><em>'ignore'}</em><em>, </em><em>default None</em>) – <p>If ‘ignore’, propagate NaN values, without passing them to func.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.2.</span></p>
</div>
</li>
<li><strong>**kwargs</strong><p>Additional keyword arguments to pass as keywords arguments to
<cite>func</cite>.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.3.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Transformed DeferredDataFrame.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.apply" title="apache_beam.dataframe.frames.DeferredDataFrame.apply"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.apply()</span></code></a></dt>
<dd>Apply a function along input axis of DeferredDataFrame.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mf">2.12</span><span class="p">],</span> <span class="p">[</span><span class="mf">3.356</span><span class="p">,</span> <span class="mf">4.567</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> 0 1</span>
<span class="go">0 1.000 2.120</span>
<span class="go">1 3.356 4.567</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">applymap</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">x</span><span class="p">)))</span>
<span class="go"> 0 1</span>
<span class="go">0 3 4</span>
<span class="go">1 5 5</span>
<span class="go">Like Series.map, NA values can be ignored:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_copy</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_copy</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">NA</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_copy</span><span class="o">.</span><span class="n">applymap</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="nb">len</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">x</span><span class="p">)),</span> <span class="n">na_action</span><span class="o">=</span><span class="s1">&#39;ignore&#39;</span><span class="p">)</span>
<span class="go"> 0 1</span>
<span class="go">0 &lt;NA&gt; 4</span>
<span class="go">1 5 5</span>
<span class="go">Note that a vectorized version of `func` often exists, which will</span>
<span class="go">be much faster. You could square each number elementwise.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">applymap</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">**</span><span class="mi">2</span><span class="p">)</span>
<span class="go"> 0 1</span>
<span class="go">0 1.000000 4.494400</span>
<span class="go">1 11.262736 20.857489</span>
<span class="go">But it&#39;s better to avoid applymap in that case.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">**</span> <span class="mi">2</span>
<span class="go"> 0 1</span>
<span class="go">0 1.000000 4.494400</span>
<span class="go">1 11.262736 20.857489</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.add_prefix">
<code class="descname">add_prefix</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.add_prefix" title="Permalink to this definition"></a></dt>
<dd><p>Prefix labels with string <cite>prefix</cite>.</p>
<p>For Series, the row labels are prefixed.
For DataFrame, the column labels are prefixed.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>prefix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – The string to add before each label.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">New DeferredSeries or DeferredDataFrame with updated labels.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.add_suffix" title="apache_beam.dataframe.frames.DeferredSeries.add_suffix"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.add_suffix()</span></code></a></dt>
<dd>Suffix row labels with string <cite>suffix</cite>.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add_suffix" title="apache_beam.dataframe.frames.DeferredDataFrame.add_suffix"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add_suffix()</span></code></a></dt>
<dd>Suffix column labels with string <cite>suffix</cite>.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">2 3</span>
<span class="go">3 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">add_prefix</span><span class="p">(</span><span class="s1">&#39;item_&#39;</span><span class="p">)</span>
<span class="go">item_0 1</span>
<span class="go">item_1 2</span>
<span class="go">item_2 3</span>
<span class="go">item_3 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 1 3</span>
<span class="go">1 2 4</span>
<span class="go">2 3 5</span>
<span class="go">3 4 6</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add_prefix</span><span class="p">(</span><span class="s1">&#39;col_&#39;</span><span class="p">)</span>
<span class="go"> col_A col_B</span>
<span class="go">0 1 3</span>
<span class="go">1 2 4</span>
<span class="go">2 3 5</span>
<span class="go">3 4 6</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.add_suffix">
<code class="descname">add_suffix</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.add_suffix" title="Permalink to this definition"></a></dt>
<dd><p>Suffix labels with string <cite>suffix</cite>.</p>
<p>For Series, the row labels are suffixed.
For DataFrame, the column labels are suffixed.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>suffix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – The string to add after each label.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">New DeferredSeries or DeferredDataFrame with updated labels.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.add_prefix" title="apache_beam.dataframe.frames.DeferredSeries.add_prefix"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.add_prefix()</span></code></a></dt>
<dd>Prefix row labels with string <cite>prefix</cite>.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add_prefix" title="apache_beam.dataframe.frames.DeferredDataFrame.add_prefix"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add_prefix()</span></code></a></dt>
<dd>Prefix column labels with string <cite>prefix</cite>.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">2 3</span>
<span class="go">3 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">add_suffix</span><span class="p">(</span><span class="s1">&#39;_item&#39;</span><span class="p">)</span>
<span class="go">0_item 1</span>
<span class="go">1_item 2</span>
<span class="go">2_item 3</span>
<span class="go">3_item 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 1 3</span>
<span class="go">1 2 4</span>
<span class="go">2 3 5</span>
<span class="go">3 4 6</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add_suffix</span><span class="p">(</span><span class="s1">&#39;_col&#39;</span><span class="p">)</span>
<span class="go"> A_col B_col</span>
<span class="go">0 1 3</span>
<span class="go">1 2 4</span>
<span class="go">2 3 5</span>
<span class="go">3 4 6</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.memory_usage">
<code class="descname">memory_usage</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.memory_usage" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.memory_usage.html#pandas.DataFrame.memory_usage" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.memory_usage()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.info">
<code class="descname">info</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.info" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.info.html#pandas.DataFrame.info" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.info()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.clip">
<code class="descname">clip</code><span class="sig-paren">(</span><em>axis</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.clip"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.clip" title="Permalink to this definition"></a></dt>
<dd><p><code class="docutils literal notranslate"><span class="pre">lower</span></code> and <code class="docutils literal notranslate"><span class="pre">upper</span></code> must be <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredSeries</span></code></a> instances, or
constants. Array-like arguments are not supported because they are
order-sensitive.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.corr">
<code class="descname">corr</code><span class="sig-paren">(</span><em>method</em>, <em>min_periods</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.corr"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.corr" title="Permalink to this definition"></a></dt>
<dd><p>Compute pairwise correlation of columns, excluding NA/null values.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>method</strong> (<em>{'pearson'</em><em>, </em><em>'kendall'</em><em>, </em><em>'spearman'}</em><em> or </em><em>callable</em>) – <p>Method of correlation:</p>
<ul>
<li>pearson : standard correlation coefficient</li>
<li>kendall : Kendall Tau correlation coefficient</li>
<li>spearman : Spearman rank correlation</li>
<li><dl class="first docutils">
<dt>callable: callable with input two 1d ndarrays</dt>
<dd>and returning a float. Note that the returned matrix from corr
will have 1 along the diagonals and will be symmetric
regardless of the callable’s behavior.</dd>
</dl>
</li>
</ul>
</li>
<li><strong>min_periods</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – Minimum number of observations required per pair of columns
to have a valid result. Currently only available for Pearson
and Spearman correlation.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Correlation matrix.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only <code class="docutils literal notranslate"><span class="pre">method=&quot;pearson&quot;</span></code> can be parallelized. Other methods require
collecting all data on a single worker (see
<a class="reference external" href="https://s.apache.org/dataframe-non-parallel-operations">https://s.apache.org/dataframe-non-parallel-operations</a> for details).</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.corrwith" title="apache_beam.dataframe.frames.DeferredDataFrame.corrwith"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.corrwith()</span></code></a></dt>
<dd>Compute pairwise correlation with another DeferredDataFrame or DeferredSeries.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.corr" title="apache_beam.dataframe.frames.DeferredSeries.corr"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.corr()</span></code></a></dt>
<dd>Compute the correlation between two DeferredSeries.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="k">def</span> <span class="nf">histogram_intersection</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">):</span>
<span class="gp">... </span> <span class="n">v</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">minimum</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="n">decimals</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="gp">... </span> <span class="k">return</span> <span class="n">v</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([(</span><span class="mf">.2</span><span class="p">,</span> <span class="mf">.3</span><span class="p">),</span> <span class="p">(</span><span class="mf">.0</span><span class="p">,</span> <span class="mf">.6</span><span class="p">),</span> <span class="p">(</span><span class="mf">.6</span><span class="p">,</span> <span class="mf">.0</span><span class="p">),</span> <span class="p">(</span><span class="mf">.2</span><span class="p">,</span> <span class="mf">.1</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;dogs&#39;</span><span class="p">,</span> <span class="s1">&#39;cats&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">corr</span><span class="p">(</span><span class="n">method</span><span class="o">=</span><span class="n">histogram_intersection</span><span class="p">)</span>
<span class="go"> dogs cats</span>
<span class="go">dogs 1.0 0.3</span>
<span class="go">cats 0.3 1.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.cov">
<code class="descname">cov</code><span class="sig-paren">(</span><em>min_periods</em>, <em>ddof</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.cov"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.cov" title="Permalink to this definition"></a></dt>
<dd><p>Compute pairwise covariance of columns, excluding NA/null values.</p>
<p>Compute the pairwise covariance among the series of a DataFrame.
The returned data frame is the <a class="reference external" href="https://en.wikipedia.org/wiki/Covariance_matrix">covariance matrix</a> of the columns
of the DataFrame.</p>
<p>Both NA and null values are automatically excluded from the
calculation. (See the note below about bias from missing values.)
A threshold can be set for the minimum number of
observations for each value created. Comparisons with observations
below this threshold will be returned as <code class="docutils literal notranslate"><span class="pre">NaN</span></code>.</p>
<p>This method is generally used for the analysis of time series data to
understand the relationship between different measures
across time.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>min_periods</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – Minimum number of observations required per pair of columns
to have a valid result.</li>
<li><strong>ddof</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 1</em>) – <p>Delta degrees of freedom. The divisor used in calculations
is <code class="docutils literal notranslate"><span class="pre">N</span> <span class="pre">-</span> <span class="pre">ddof</span></code>, where <code class="docutils literal notranslate"><span class="pre">N</span></code> represents the number of elements.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The covariance matrix of the series of the DeferredDataFrame.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.cov" title="apache_beam.dataframe.frames.DeferredSeries.cov"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.cov()</span></code></a></dt>
<dd>Compute covariance with another DeferredSeries.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">core.window.ExponentialMovingWindow.cov()</span></code></dt>
<dd>Exponential weighted sample covariance.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">core.window.Expanding.cov()</span></code></dt>
<dd>Expanding sample covariance.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">core.window.Rolling.cov()</span></code></dt>
<dd>Rolling sample covariance.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Returns the covariance matrix of the DeferredDataFrame’s time series.
The covariance is normalized by N-ddof.</p>
<p>For DeferredDataFrames that have DeferredSeries that are missing data (assuming that
data is <a class="reference external" href="https://en.wikipedia.org/wiki/Missing_data#Missing_at_random">missing at random</a>)
the returned covariance matrix will be an unbiased estimate
of the variance and covariance between the member DeferredSeries.</p>
<p>However, for many applications this estimate may not be acceptable
because the estimate covariance matrix is not guaranteed to be positive
semi-definite. This could lead to estimate correlations having
absolute values which are greater than one, and/or a non-invertible
covariance matrix. See <a class="reference external" href="https://en.wikipedia.org/w/index.php?title=Estimation_of_covariance_matrices">Estimation of covariance matrices</a> for more details.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">),</span> <span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span> <span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;dogs&#39;</span><span class="p">,</span> <span class="s1">&#39;cats&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">cov</span><span class="p">()</span>
<span class="go"> dogs cats</span>
<span class="go">dogs 0.666667 -1.000000</span>
<span class="go">cats -1.000000 1.666667</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">42</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">1000</span><span class="p">,</span> <span class="mi">5</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">cov</span><span class="p">()</span>
<span class="go"> a b c d e</span>
<span class="go">a 0.998438 -0.020161 0.059277 -0.008943 0.014144</span>
<span class="go">b -0.020161 1.059352 -0.008543 -0.024738 0.009826</span>
<span class="go">c 0.059277 -0.008543 1.010670 -0.001486 -0.000271</span>
<span class="go">d -0.008943 -0.024738 -0.001486 0.921297 -0.013692</span>
<span class="go">e 0.014144 0.009826 -0.000271 -0.013692 0.977795</span>
<span class="go">**Minimum number of periods**</span>
<span class="go">This method also supports an optional ``min_periods`` keyword</span>
<span class="go">that specifies the required minimum number of non-NA observations for</span>
<span class="go">each column pair in order to have a valid result:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">42</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">20</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="p">[:</span><span class="mi">5</span><span class="p">],</span> <span class="s1">&#39;a&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">index</span><span class="p">[</span><span class="mi">5</span><span class="p">:</span><span class="mi">10</span><span class="p">],</span> <span class="s1">&#39;b&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">cov</span><span class="p">(</span><span class="n">min_periods</span><span class="o">=</span><span class="mi">12</span><span class="p">)</span>
<span class="go"> a b c</span>
<span class="go">a 0.316741 NaN -0.150812</span>
<span class="go">b NaN 1.248003 0.191417</span>
<span class="go">c -0.150812 0.191417 0.895202</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.corrwith">
<code class="descname">corrwith</code><span class="sig-paren">(</span><em>other</em>, <em>axis</em>, <em>drop</em>, <em>method</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.corrwith"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.corrwith" title="Permalink to this definition"></a></dt>
<dd><p>Compute pairwise correlation.</p>
<p>Pairwise correlation is computed between rows or columns of
DataFrame with rows or columns of Series or DataFrame. DataFrames
are first aligned along both axes before computing the
correlations.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a>) – Object with which to compute correlations.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – The axis to use. 0 or ‘index’ to compute column-wise, 1 or ‘columns’ for
row-wise.</li>
<li><strong>drop</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Drop missing indices from result.</li>
<li><strong>method</strong> (<em>{'pearson'</em><em>, </em><em>'kendall'</em><em>, </em><em>'spearman'}</em><em> or </em><em>callable</em>) – <p>Method of correlation:</p>
<ul>
<li>pearson : standard correlation coefficient</li>
<li>kendall : Kendall Tau correlation coefficient</li>
<li>spearman : Spearman rank correlation</li>
<li><dl class="first docutils">
<dt>callable: callable with input two 1d ndarrays</dt>
<dd>and returning a float.</dd>
</dl>
</li>
</ul>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Pairwise correlations.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.corr" title="apache_beam.dataframe.frames.DeferredDataFrame.corr"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.corr()</span></code></a></dt>
<dd>Compute pairwise correlation of columns.</dd>
</dl>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.cummax">
<code class="descname">cummax</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.cummax" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.cummax.html#pandas.DataFrame.cummax" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.cummax()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.cummin">
<code class="descname">cummin</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.cummin" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.cummin.html#pandas.DataFrame.cummin" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.cummin()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.cumprod">
<code class="descname">cumprod</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.cumprod" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.cumprod.html#pandas.DataFrame.cumprod" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.cumprod()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.cumsum">
<code class="descname">cumsum</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.cumsum" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.cumsum.html#pandas.DataFrame.cumsum" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.cumsum()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.diff">
<code class="descname">diff</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.diff" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.diff.html#pandas.DataFrame.diff" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.diff()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.interpolate">
<code class="descname">interpolate</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.interpolate" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.interpolate.html#pandas.DataFrame.interpolate" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.interpolate()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.pct_change">
<code class="descname">pct_change</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.pct_change" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.pct_change.html#pandas.DataFrame.pct_change" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.pct_change()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.asof">
<code class="descname">asof</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.asof" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.asof.html#pandas.DataFrame.asof" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.asof()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.first_valid_index">
<code class="descname">first_valid_index</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.first_valid_index" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.first_valid_index.html#pandas.DataFrame.first_valid_index" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.first_valid_index()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.last_valid_index">
<code class="descname">last_valid_index</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.last_valid_index" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.last_valid_index.html#pandas.DataFrame.last_valid_index" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.last_valid_index()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.iat">
<code class="descname">iat</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.iat" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.iat()</span></code> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.lookup">
<code class="descname">lookup</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.lookup" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.lookup.html#pandas.DataFrame.lookup" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.lookup()</span></code></a> is not yet supported in the Beam DataFrame API because it is deprecated in pandas.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.head">
<code class="descname">head</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.head" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.head.html#pandas.DataFrame.head" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.head()</span></code></a> is not yet supported in the Beam DataFrame API because it is <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">order-sensitive</a>.</p>
<p>If you want to peek at a large dataset consider using interactive Beam’s <a class="reference internal" href="apache_beam.runners.interactive.interactive_beam.html#apache_beam.runners.interactive.interactive_beam.collect" title="apache_beam.runners.interactive.interactive_beam.collect"><code class="xref py py-func docutils literal notranslate"><span class="pre">ib.collect</span></code></a> with <code class="docutils literal notranslate"><span class="pre">n</span></code> specified, or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sample" title="apache_beam.dataframe.frames.DeferredDataFrame.sample"><code class="xref py py-meth docutils literal notranslate"><span class="pre">sample()</span></code></a>. If you want to find the N largest elements, consider using <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.nlargest" title="apache_beam.dataframe.frames.DeferredDataFrame.nlargest"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.nlargest()</span></code></a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.tail">
<code class="descname">tail</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.tail" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.tail.html#pandas.DataFrame.tail" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.tail()</span></code></a> is not yet supported in the Beam DataFrame API because it is <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">order-sensitive</a>.</p>
<p>If you want to peek at a large dataset consider using interactive Beam’s <a class="reference internal" href="apache_beam.runners.interactive.interactive_beam.html#apache_beam.runners.interactive.interactive_beam.collect" title="apache_beam.runners.interactive.interactive_beam.collect"><code class="xref py py-func docutils literal notranslate"><span class="pre">ib.collect</span></code></a> with <code class="docutils literal notranslate"><span class="pre">n</span></code> specified, or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sample" title="apache_beam.dataframe.frames.DeferredDataFrame.sample"><code class="xref py py-meth docutils literal notranslate"><span class="pre">sample()</span></code></a>. If you want to find the N largest elements, consider using <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.nlargest" title="apache_beam.dataframe.frames.DeferredDataFrame.nlargest"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.nlargest()</span></code></a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.sample">
<code class="descname">sample</code><span class="sig-paren">(</span><em>n</em>, <em>frac</em>, <em>replace</em>, <em>weights</em>, <em>random_state</em>, <em>axis</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.sample"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.sample" title="Permalink to this definition"></a></dt>
<dd><p>Return a random sample of items from an axis of object.</p>
<p>You can use <cite>random_state</cite> for reproducibility.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>n</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – Number of items from axis to return. Cannot be used with <cite>frac</cite>.
Default = 1 if <cite>frac</cite> = None.</li>
<li><strong>frac</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em>, </em><em>optional</em>) – Fraction of axis items to return. Cannot be used with <cite>n</cite>.</li>
<li><strong>replace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Allow or disallow sampling of the same row more than once.</li>
<li><strong>weights</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>ndarray-like</em><em>, </em><em>optional</em>) – Default ‘None’ results in equal probability weighting.
If passed a DeferredSeries, will align with target object on index. Index
values in weights not found in sampled object will be ignored and
index values in sampled object not in weights will be assigned
weights of zero.
If called on a DeferredDataFrame, will accept the name of a column
when axis = 0.
Unless weights are a DeferredSeries, weights must be same length as axis
being sampled.
If weights do not sum to 1, they will be normalized to sum to 1.
Missing values in the weights column will be treated as zero.
Infinite values not allowed.</li>
<li><strong>random_state</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>array-like</em><em>, </em><em>BitGenerator</em><em>, </em><em>np.random.RandomState</em><em>, </em><em>optional</em>) – <p>If int, array-like, or BitGenerator (NumPy&gt;=1.17), seed for
random number generator
If np.random.RandomState, use as numpy RandomState object.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.1.0: </span>array-like and BitGenerator (for NumPy&gt;=1.17) object now passed to
np.random.RandomState() as seed</p>
</div>
</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>‘index’</em><em>, </em><em>1</em><em> or </em><em>‘columns’</em><em>, </em><em>None}</em><em>, </em><em>default None</em>) – Axis to sample. Accepts axis number or name. Default is stat axis
for given data type (0 for DeferredSeries and DeferredDataFrames).</li>
<li><strong>ignore_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>If True, the resulting index will be labeled 0, 1, …, n - 1.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.3.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A new object of same type as caller containing <cite>n</cite> items randomly
sampled from the caller object.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>When <code class="docutils literal notranslate"><span class="pre">axis='index'</span></code>, only <code class="docutils literal notranslate"><span class="pre">n</span></code> and/or <code class="docutils literal notranslate"><span class="pre">weights</span></code> may be specified.
<code class="docutils literal notranslate"><span class="pre">frac</span></code>, <code class="docutils literal notranslate"><span class="pre">random_state</span></code>, and <code class="docutils literal notranslate"><span class="pre">replace=True</span></code> are not yet supported.
See <a class="reference external" href="https://issues.apache.org/jira/BEAM-12476">BEAM-12476</a>.</p>
<p>Note that pandas will raise an error if <code class="docutils literal notranslate"><span class="pre">n</span></code> is larger than the length
of the dataset, while the Beam DataFrame API will simply return the full
dataset in that case.</p>
<p>sample is fully supported for axis=’columns’.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrameGroupBy.sample()</span></code></dt>
<dd>Generates random samples from each group of a DeferredDataFrame object.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeriesGroupBy.sample()</span></code></dt>
<dd>Generates random samples from each group of a DeferredSeries object.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.random.choice()</span></code></dt>
<dd>Generates a random sample from a given 1-D numpy array.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>If <cite>frac</cite> &gt; 1, <cite>replacement</cite> should be set to <cite>True</cite>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;num_legs&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;num_wings&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;num_specimen_seen&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">10</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">8</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;dog&#39;</span><span class="p">,</span> <span class="s1">&#39;spider&#39;</span><span class="p">,</span> <span class="s1">&#39;fish&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> num_legs num_wings num_specimen_seen</span>
<span class="go">falcon 2 2 10</span>
<span class="go">dog 4 0 2</span>
<span class="go">spider 8 0 1</span>
<span class="go">fish 0 0 8</span>
<span class="go">Extract 3 random elements from the ``Series`` ``df[&#39;num_legs&#39;]``:</span>
<span class="go">Note that we use `random_state` to ensure the reproducibility of</span>
<span class="go">the examples.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="p">[</span><span class="s1">&#39;num_legs&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">n</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">fish 0</span>
<span class="go">spider 8</span>
<span class="go">falcon 2</span>
<span class="go">Name: num_legs, dtype: int64</span>
<span class="go">A random 50% sample of the ``DataFrame`` with replacement:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">frac</span><span class="o">=</span><span class="mf">0.5</span><span class="p">,</span> <span class="n">replace</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> num_legs num_wings num_specimen_seen</span>
<span class="go">dog 4 0 2</span>
<span class="go">fish 0 0 8</span>
<span class="go">An upsample sample of the ``DataFrame`` with replacement:</span>
<span class="go">Note that `replace` parameter has to be `True` for `frac` parameter &gt; 1.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">frac</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">replace</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> num_legs num_wings num_specimen_seen</span>
<span class="go">dog 4 0 2</span>
<span class="go">fish 0 0 8</span>
<span class="go">falcon 2 2 10</span>
<span class="go">falcon 2 2 10</span>
<span class="go">fish 0 0 8</span>
<span class="go">dog 4 0 2</span>
<span class="go">fish 0 0 8</span>
<span class="go">dog 4 0 2</span>
<span class="go">Using a DataFrame column as weights. Rows with larger value in the</span>
<span class="go">`num_specimen_seen` column are more likely to be sampled.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">n</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">weights</span><span class="o">=</span><span class="s1">&#39;num_specimen_seen&#39;</span><span class="p">,</span> <span class="n">random_state</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> num_legs num_wings num_specimen_seen</span>
<span class="go">falcon 2 2 10</span>
<span class="go">fish 0 0 8</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.dot">
<code class="descname">dot</code><span class="sig-paren">(</span><em>other</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.dot"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.dot" title="Permalink to this definition"></a></dt>
<dd><p>Compute the matrix multiplication between the DataFrame and other.</p>
<p>This method computes the matrix product between the DataFrame and the
values of an other Series, DataFrame or a numpy array.</p>
<p>It can also be called using <code class="docutils literal notranslate"><span class="pre">self</span> <span class="pre">&#64;</span> <span class="pre">other</span></code> in Python &gt;= 3.5.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a><em> or </em><em>array-like</em>) – The other object to compute the matrix product with.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">If other is a DeferredSeries, return the matrix product between self and
other as a DeferredSeries. If other is a DeferredDataFrame or a numpy.array, return
the matrix product of self and other in a DeferredDataFrame of a np.array.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.dot" title="apache_beam.dataframe.frames.DeferredSeries.dot"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.dot()</span></code></a></dt>
<dd>Similar method for DeferredSeries.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>The dimensions of DeferredDataFrame and other must be compatible in order to
compute the matrix multiplication. In addition, the column names of
DeferredDataFrame and the index of other must contain the same values, as they
will be aligned prior to the multiplication.</p>
<p>The dot method for DeferredSeries computes the inner product, instead of the
matrix product here.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Here we multiply a DataFrame with a Series.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">2</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
<span class="go">0 -4</span>
<span class="go">1 5</span>
<span class="go">dtype: int64</span>
<span class="go">Here we multiply a DataFrame with another DataFrame.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
<span class="go"> 0 1</span>
<span class="go">0 1 4</span>
<span class="go">1 2 2</span>
<span class="go">Note that the dot method give the same result as @</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">@</span> <span class="n">other</span>
<span class="go"> 0 1</span>
<span class="go">0 1 4</span>
<span class="go">1 2 2</span>
<span class="go">The dot method works also if other is an np.array.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">arr</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">arr</span><span class="p">)</span>
<span class="go"> 0 1</span>
<span class="go">0 1 4</span>
<span class="go">1 2 2</span>
<span class="go">Note how shuffling of the objects does not change the result.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s2</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">reindex</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dot</span><span class="p">(</span><span class="n">s2</span><span class="p">)</span>
<span class="go">0 -4</span>
<span class="go">1 5</span>
<span class="go">dtype: int64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.mode">
<code class="descname">mode</code><span class="sig-paren">(</span><em>axis=0</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.mode"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.mode" title="Permalink to this definition"></a></dt>
<dd><p>Get the mode(s) of each element along the selected axis.</p>
<p>The mode of a set of values is the value that appears most often.
It can be multiple values.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – <p>The axis to iterate over while searching for the mode:</p>
<ul>
<li>0 or ‘index’ : get mode of each column</li>
<li>1 or ‘columns’ : get mode of each row.</li>
</ul>
</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If True, only apply to numeric columns.</li>
<li><strong>dropna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Don’t consider counts of NaN/NaT.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The modes of each column or row.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>mode with axis=”columns” is not implemented because it produces
non-deferred columns.</p>
<p>mode with axis=”index” is not currently parallelizable. An approximate,
parallelizable implementation of mode may be added in the future
(<a class="reference external" href="https://issues.apache.org/jira/BEAM-12181">BEAM-12181</a>).</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.mode" title="apache_beam.dataframe.frames.DeferredSeries.mode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.mode()</span></code></a></dt>
<dd>Return the highest frequency value in a DeferredSeries.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.value_counts" title="apache_beam.dataframe.frames.DeferredSeries.value_counts"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.value_counts()</span></code></a></dt>
<dd>Return the counts of values in a DeferredSeries.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([(</span><span class="s1">&#39;bird&#39;</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;arthropod&#39;</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">0</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;bird&#39;</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;horse&#39;</span><span class="p">,</span> <span class="s1">&#39;spider&#39;</span><span class="p">,</span> <span class="s1">&#39;ostrich&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;species&#39;</span><span class="p">,</span> <span class="s1">&#39;legs&#39;</span><span class="p">,</span> <span class="s1">&#39;wings&#39;</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> species legs wings</span>
<span class="go">falcon bird 2 2.0</span>
<span class="go">horse mammal 4 NaN</span>
<span class="go">spider arthropod 8 0.0</span>
<span class="go">ostrich bird 2 NaN</span>
<span class="go">By default, missing values are not considered, and the mode of wings</span>
<span class="go">are both 0 and 2. Because the resulting DataFrame has two rows,</span>
<span class="go">the second row of ``species`` and ``legs`` contains ``NaN``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mode</span><span class="p">()</span>
<span class="go"> species legs wings</span>
<span class="go">0 bird 2.0 0.0</span>
<span class="go">1 NaN NaN 2.0</span>
<span class="go">Setting ``dropna=False`` ``NaN`` values are considered and they can be</span>
<span class="go">the mode (like for wings).</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="n">dropna</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go"> species legs wings</span>
<span class="go">0 bird 2 NaN</span>
<span class="go">Setting ``numeric_only=True``, only the mode of numeric columns is</span>
<span class="go">computed, and columns of other types are ignored.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="n">numeric_only</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> legs wings</span>
<span class="go">0 2.0 0.0</span>
<span class="go">1 NaN 2.0</span>
<span class="go">To compute the mode over columns and not rows, use the axis parameter:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mode</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">,</span> <span class="n">numeric_only</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> 0 1</span>
<span class="go">falcon 2.0 NaN</span>
<span class="go">horse 4.0 NaN</span>
<span class="go">spider 0.0 8.0</span>
<span class="go">ostrich 2.0 NaN</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.dropna">
<code class="descname">dropna</code><span class="sig-paren">(</span><em>axis</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.dropna"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.dropna" title="Permalink to this definition"></a></dt>
<dd><p>Remove missing values.</p>
<p>See the <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/missing_data.html#missing-data" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">User Guide</span></a> for more on which values are
considered missing, and how to work with missing data.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – <p>Determine if rows or columns which contain missing values are
removed.</p>
<ul>
<li>0, or ‘index’ : Drop rows which contain missing values.</li>
<li>1, or ‘columns’ : Drop columns which contain missing value.</li>
</ul>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.0.0: </span>Pass tuple or list to drop on multiple axes.
Only a single axis is allowed.</p>
</div>
</li>
<li><strong>how</strong> (<em>{'any'</em><em>, </em><em>'all'}</em><em>, </em><em>default 'any'</em>) – <p>Determine if row or column is removed from DeferredDataFrame, when we have
at least one NA or all NA.</p>
<ul>
<li>’any’ : If any NA values are present, drop that row or column.</li>
<li>’all’ : If all values are NA, drop that row or column.</li>
</ul>
</li>
<li><strong>thresh</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – Require that many non-NA values.</li>
<li><strong>subset</strong> (<em>array-like</em><em>, </em><em>optional</em>) – Labels along other axis to consider, e.g. if you are dropping rows
these would be a list of columns to include.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If True, do operation inplace and return None.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">DeferredDataFrame with NA entries dropped from it or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>dropna with axis=”columns” specified cannot be parallelized.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.isna" title="apache_beam.dataframe.frames.DeferredDataFrame.isna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.isna()</span></code></a></dt>
<dd>Indicate missing values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.notna" title="apache_beam.dataframe.frames.DeferredDataFrame.notna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.notna()</span></code></a></dt>
<dd>Indicate existing (non-missing) values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.fillna" title="apache_beam.dataframe.frames.DeferredDataFrame.fillna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.fillna()</span></code></a></dt>
<dd>Replace missing values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.dropna" title="apache_beam.dataframe.frames.DeferredSeries.dropna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.dropna()</span></code></a></dt>
<dd>Drop missing values.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">Index.dropna()</span></code></dt>
<dd>Drop missing indices.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;Alfred&#39;</span><span class="p">,</span> <span class="s1">&#39;Batman&#39;</span><span class="p">,</span> <span class="s1">&#39;Catwoman&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s2">&quot;toy&quot;</span><span class="p">:</span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="s1">&#39;Batmobile&#39;</span><span class="p">,</span> <span class="s1">&#39;Bullwhip&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s2">&quot;born&quot;</span><span class="p">:</span> <span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">NaT</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s2">&quot;1940-04-25&quot;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">pd</span><span class="o">.</span><span class="n">NaT</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> name toy born</span>
<span class="go">0 Alfred NaN NaT</span>
<span class="go">1 Batman Batmobile 1940-04-25</span>
<span class="go">2 Catwoman Bullwhip NaT</span>
<span class="go">Drop the rows where at least one element is missing.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">()</span>
<span class="go"> name toy born</span>
<span class="go">1 Batman Batmobile 1940-04-25</span>
<span class="go">Drop the columns where at least one element is missing.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> name</span>
<span class="go">0 Alfred</span>
<span class="go">1 Batman</span>
<span class="go">2 Catwoman</span>
<span class="go">Drop the rows where all elements are missing.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">how</span><span class="o">=</span><span class="s1">&#39;all&#39;</span><span class="p">)</span>
<span class="go"> name toy born</span>
<span class="go">0 Alfred NaN NaT</span>
<span class="go">1 Batman Batmobile 1940-04-25</span>
<span class="go">2 Catwoman Bullwhip NaT</span>
<span class="go">Keep only the rows with at least 2 non-NA values.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">thresh</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
<span class="go"> name toy born</span>
<span class="go">1 Batman Batmobile 1940-04-25</span>
<span class="go">2 Catwoman Bullwhip NaT</span>
<span class="go">Define in which columns to look for missing values.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">subset</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;toy&#39;</span><span class="p">])</span>
<span class="go"> name toy born</span>
<span class="go">1 Batman Batmobile 1940-04-25</span>
<span class="go">2 Catwoman Bullwhip NaT</span>
<span class="go">Keep the DataFrame with valid entries in the same variable.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> name toy born</span>
<span class="go">1 Batman Batmobile 1940-04-25</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.eval">
<code class="descname">eval</code><span class="sig-paren">(</span><em>expr</em>, <em>inplace</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.eval"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.eval" title="Permalink to this definition"></a></dt>
<dd><p>Evaluate a string describing operations on DataFrame columns.</p>
<p>Operates on columns only, not specific rows or elements. This allows
<cite>eval</cite> to run arbitrary code, which can make you vulnerable to code
injection if you pass user input to this function.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>expr</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – The expression string to evaluate.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If the expression contains an assignment, whether to perform the
operation inplace and mutate the existing DeferredDataFrame. Otherwise,
a new DeferredDataFrame is returned.</li>
<li><strong>**kwargs</strong> – See the documentation for <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eval" title="apache_beam.dataframe.frames.DeferredDataFrame.eval"><code class="xref py py-func docutils literal notranslate"><span class="pre">eval()</span></code></a> for complete details
on the keyword arguments accepted by
<code class="xref py py-meth docutils literal notranslate"><span class="pre">query()</span></code>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The result of the evaluation or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">ndarray, scalar, pandas object, or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Accessing local variables with <code class="docutils literal notranslate"><span class="pre">&#64;&lt;varname&gt;</span></code> is not yet supported
(<a class="reference external" href="https://issues.apache.org/jira/browse/BEAM-11202">BEAM-11202</a>).</p>
<p>Arguments <code class="docutils literal notranslate"><span class="pre">local_dict</span></code>, <code class="docutils literal notranslate"><span class="pre">global_dict</span></code>, <code class="docutils literal notranslate"><span class="pre">level</span></code>, <code class="docutils literal notranslate"><span class="pre">target</span></code>, and
<code class="docutils literal notranslate"><span class="pre">resolvers</span></code> are not yet supported.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.query" title="apache_beam.dataframe.frames.DeferredDataFrame.query"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.query()</span></code></a></dt>
<dd>Evaluates a boolean expression to query the columns of a frame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.assign" title="apache_beam.dataframe.frames.DeferredDataFrame.assign"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.assign()</span></code></a></dt>
<dd>Can evaluate an expression or function to create new values for a column.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eval" title="apache_beam.dataframe.frames.DeferredDataFrame.eval"><code class="xref py py-meth docutils literal notranslate"><span class="pre">eval()</span></code></a></dt>
<dd>Evaluate a Python expression as a string using various backends.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>For more details see the API documentation for <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eval" title="apache_beam.dataframe.frames.DeferredDataFrame.eval"><code class="xref py py-func docutils literal notranslate"><span class="pre">eval()</span></code></a>.
For detailed examples see <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/enhancingperf.html#enhancingperf-eval" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">enhancing performance with eval</span></a>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">6</span><span class="p">),</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="o">-</span><span class="mi">2</span><span class="p">)})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 1 10</span>
<span class="go">1 2 8</span>
<span class="go">2 3 6</span>
<span class="go">3 4 4</span>
<span class="go">4 5 2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eval</span><span class="p">(</span><span class="s1">&#39;A + B&#39;</span><span class="p">)</span>
<span class="go">0 11</span>
<span class="go">1 10</span>
<span class="go">2 9</span>
<span class="go">3 8</span>
<span class="go">4 7</span>
<span class="go">dtype: int64</span>
<span class="go">Assignment is allowed though by default the original DataFrame is not</span>
<span class="go">modified.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eval</span><span class="p">(</span><span class="s1">&#39;C = A + B&#39;</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 1 10 11</span>
<span class="go">1 2 8 10</span>
<span class="go">2 3 6 9</span>
<span class="go">3 4 4 8</span>
<span class="go">4 5 2 7</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 1 10</span>
<span class="go">1 2 8</span>
<span class="go">2 3 6</span>
<span class="go">3 4 4</span>
<span class="go">4 5 2</span>
<span class="go">Use ``inplace=True`` to modify the original DataFrame.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eval</span><span class="p">(</span><span class="s1">&#39;C = A + B&#39;</span><span class="p">,</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B C</span>
<span class="go">0 1 10 11</span>
<span class="go">1 2 8 10</span>
<span class="go">2 3 6 9</span>
<span class="go">3 4 4 8</span>
<span class="go">4 5 2 7</span>
<span class="go">Multiple columns can be assigned to using multi-line expressions:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eval</span><span class="p">(</span>
<span class="gp">... </span> <span class="sd">&#39;&#39;&#39;</span>
<span class="gp">... </span><span class="sd">C = A + B</span>
<span class="gp">... </span><span class="sd">D = A - B</span>
<span class="gp">... </span><span class="sd">&#39;&#39;&#39;</span>
<span class="gp">... </span><span class="p">)</span>
<span class="go"> A B C D</span>
<span class="go">0 1 10 11 -9</span>
<span class="go">1 2 8 10 -6</span>
<span class="go">2 3 6 9 -3</span>
<span class="go">3 4 4 8 0</span>
<span class="go">4 5 2 7 3</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.query">
<code class="descname">query</code><span class="sig-paren">(</span><em>expr</em>, <em>inplace</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.query"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.query" title="Permalink to this definition"></a></dt>
<dd><p>Query the columns of a DataFrame with a boolean expression.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>expr</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – <p>The query string to evaluate.</p>
<p>You can refer to variables
in the environment by prefixing them with an ‘&#64;’ character like
<code class="docutils literal notranslate"><span class="pre">&#64;a</span> <span class="pre">+</span> <span class="pre">b</span></code>.</p>
<p>You can refer to column names that are not valid Python variable names
by surrounding them in backticks. Thus, column names containing spaces
or punctuations (besides underscores) or starting with digits must be
surrounded by backticks. (For example, a column named “Area (cm^2)” would
be referenced as <code class="docutils literal notranslate"><span class="pre">`Area</span> <span class="pre">(cm^2)`</span></code>). Column names which are Python keywords
(like “list”, “for”, “import”, etc) cannot be used.</p>
<p>For example, if one of your columns is called <code class="docutils literal notranslate"><span class="pre">a</span> <span class="pre">a</span></code> and you want
to sum it with <code class="docutils literal notranslate"><span class="pre">b</span></code>, your query should be <code class="docutils literal notranslate"><span class="pre">`a</span> <span class="pre">a`</span> <span class="pre">+</span> <span class="pre">b</span></code>.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 0.25.0: </span>Backtick quoting introduced.</p>
</div>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.0.0: </span>Expanding functionality of backtick quoting for more than only spaces.</p>
</div>
</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a>) – Whether the query should modify the data in place or return
a modified copy.</li>
<li><strong>**kwargs</strong> – See the documentation for <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eval" title="apache_beam.dataframe.frames.DeferredDataFrame.eval"><code class="xref py py-func docutils literal notranslate"><span class="pre">eval()</span></code></a> for complete details
on the keyword arguments accepted by <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.query" title="apache_beam.dataframe.frames.DeferredDataFrame.query"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.query()</span></code></a>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">DeferredDataFrame resulting from the provided query expression or
None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Accessing local variables with <code class="docutils literal notranslate"><span class="pre">&#64;&lt;varname&gt;</span></code> is not yet supported
(<a class="reference external" href="https://issues.apache.org/jira/browse/BEAM-11202">BEAM-11202</a>).</p>
<p>Arguments <code class="docutils literal notranslate"><span class="pre">local_dict</span></code>, <code class="docutils literal notranslate"><span class="pre">global_dict</span></code>, <code class="docutils literal notranslate"><span class="pre">level</span></code>, <code class="docutils literal notranslate"><span class="pre">target</span></code>, and
<code class="docutils literal notranslate"><span class="pre">resolvers</span></code> are not yet supported.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eval" title="apache_beam.dataframe.frames.DeferredDataFrame.eval"><code class="xref py py-meth docutils literal notranslate"><span class="pre">eval()</span></code></a></dt>
<dd>Evaluate a string describing operations on DeferredDataFrame columns.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eval" title="apache_beam.dataframe.frames.DeferredDataFrame.eval"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.eval()</span></code></a></dt>
<dd>Evaluate a string describing operations on DeferredDataFrame columns.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>The result of the evaluation of this expression is first passed to
<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.loc" title="apache_beam.dataframe.frames.DeferredDataFrame.loc"><code class="xref py py-attr docutils literal notranslate"><span class="pre">DeferredDataFrame.loc</span></code></a> and if that fails because of a
multidimensional key (e.g., a DeferredDataFrame) then the result will be passed
to <code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.__getitem__()</span></code>.</p>
<p>This method uses the top-level <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eval" title="apache_beam.dataframe.frames.DeferredDataFrame.eval"><code class="xref py py-func docutils literal notranslate"><span class="pre">eval()</span></code></a> function to
evaluate the passed query.</p>
<p>The <code class="xref py py-meth docutils literal notranslate"><span class="pre">query()</span></code> method uses a slightly
modified Python syntax by default. For example, the <code class="docutils literal notranslate"><span class="pre">&amp;</span></code> and <code class="docutils literal notranslate"><span class="pre">|</span></code>
(bitwise) operators have the precedence of their boolean cousins,
<a class="reference external" href="https://docs.python.org/3/reference/expressions.html#and" title="(in Python v3.10)"><code class="xref std std-keyword docutils literal notranslate"><span class="pre">and</span></code></a> and <a class="reference external" href="https://docs.python.org/3/reference/expressions.html#or" title="(in Python v3.10)"><code class="xref std std-keyword docutils literal notranslate"><span class="pre">or</span></code></a>. This <em>is</em> syntactically valid Python,
however the semantics are different.</p>
<p>You can change the semantics of the expression by passing the keyword
argument <code class="docutils literal notranslate"><span class="pre">parser='python'</span></code>. This enforces the same semantics as
evaluation in Python space. Likewise, you can pass <code class="docutils literal notranslate"><span class="pre">engine='python'</span></code>
to evaluate an expression using Python itself as a backend. This is not
recommended as it is inefficient compared to using <code class="docutils literal notranslate"><span class="pre">numexpr</span></code> as the
engine.</p>
<p>The <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.index" title="apache_beam.dataframe.frames.DeferredDataFrame.index"><code class="xref py py-attr docutils literal notranslate"><span class="pre">DeferredDataFrame.index</span></code></a> and
<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.columns" title="apache_beam.dataframe.frames.DeferredDataFrame.columns"><code class="xref py py-attr docutils literal notranslate"><span class="pre">DeferredDataFrame.columns</span></code></a> attributes of the
<code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code> instance are placed in the query namespace
by default, which allows you to treat both the index and columns of the
frame as a column in the frame.
The identifier <code class="docutils literal notranslate"><span class="pre">index</span></code> is used for the frame index; you can also
use the name of the index to identify it in a query. Please note that
Python keywords may not be used as identifiers.</p>
<p>For further details and examples see the <code class="docutils literal notranslate"><span class="pre">query</span></code> documentation in
<a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/indexing.html#indexing-query" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">indexing</span></a>.</p>
<p><em>Backtick quoted variables</em></p>
<p>Backtick quoted variables are parsed as literal Python code and
are converted internally to a Python valid identifier.
This can lead to the following problems.</p>
<p>During parsing a number of disallowed characters inside the backtick
quoted string are replaced by strings that are allowed as a Python identifier.
These characters include all operators in Python, the space character, the
question mark, the exclamation mark, the dollar sign, and the euro sign.
For other characters that fall outside the ASCII range (U+0001..U+007F)
and those that are not further specified in PEP 3131,
the query parser will raise an error.
This excludes whitespace different than the space character,
but also the hashtag (as it is used for comments) and the backtick
itself (backtick can also not be escaped).</p>
<p>In a special case, quotes that make a pair around a backtick can
confuse the parser.
For example, <code class="docutils literal notranslate"><span class="pre">`it's`</span> <span class="pre">&gt;</span> <span class="pre">`that's`</span></code> will raise an error,
as it forms a quoted string (<code class="docutils literal notranslate"><span class="pre">'s</span> <span class="pre">&gt;</span> <span class="pre">`that'</span></code>) with a backtick inside.</p>
<p>See also the Python documentation about lexical analysis
(<a class="reference external" href="https://docs.python.org/3/reference/lexical_analysis.html">https://docs.python.org/3/reference/lexical_analysis.html</a>)
in combination with the source code in <code class="xref py py-mod docutils literal notranslate"><span class="pre">pandas.core.computation.parsing</span></code>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">6</span><span class="p">),</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="o">-</span><span class="mi">2</span><span class="p">),</span>
<span class="gp">... </span> <span class="s1">&#39;C C&#39;</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="o">-</span><span class="mi">1</span><span class="p">)})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B C C</span>
<span class="go">0 1 10 10</span>
<span class="go">1 2 8 9</span>
<span class="go">2 3 6 8</span>
<span class="go">3 4 4 7</span>
<span class="go">4 5 2 6</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s1">&#39;A &gt; B&#39;</span><span class="p">)</span>
<span class="go"> A B C C</span>
<span class="go">4 5 2 6</span>
<span class="go">The previous expression is equivalent to</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">A</span> <span class="o">&gt;</span> <span class="n">df</span><span class="o">.</span><span class="n">B</span><span class="p">]</span>
<span class="go"> A B C C</span>
<span class="go">4 5 2 6</span>
<span class="go">For columns with spaces in their name, you can use backtick quoting.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">query</span><span class="p">(</span><span class="s1">&#39;B == `C C`&#39;</span><span class="p">)</span>
<span class="go"> A B C C</span>
<span class="go">0 1 10 10</span>
<span class="go">The previous expression is equivalent to</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="o">.</span><span class="n">B</span> <span class="o">==</span> <span class="n">df</span><span class="p">[</span><span class="s1">&#39;C C&#39;</span><span class="p">]]</span>
<span class="go"> A B C C</span>
<span class="go">0 1 10 10</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.isnull">
<code class="descname">isnull</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.isnull" title="Permalink to this definition"></a></dt>
<dd><p>Detect missing values.</p>
<p>Return a boolean same-sized object indicating if the values are NA.
NA values, such as None or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.NaN</span></code>, gets mapped to True
values.
Everything else gets mapped to False values. Characters such as empty
strings <code class="docutils literal notranslate"><span class="pre">''</span></code> or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.inf</span></code> are not considered NA values
(unless you set <code class="docutils literal notranslate"><span class="pre">pandas.options.mode.use_inf_as_na</span> <span class="pre">=</span> <span class="pre">True</span></code>).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Mask of bool values for each element in DeferredDataFrame that
indicates whether an element is an NA value.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.isnull" title="apache_beam.dataframe.frames.DeferredDataFrame.isnull"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.isnull()</span></code></a></dt>
<dd>Alias of isna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.notna" title="apache_beam.dataframe.frames.DeferredDataFrame.notna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.notna()</span></code></a></dt>
<dd>Boolean inverse of isna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.dropna" title="apache_beam.dataframe.frames.DeferredDataFrame.dropna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.dropna()</span></code></a></dt>
<dd>Omit axes labels with missing values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.isna" title="apache_beam.dataframe.frames.DeferredDataFrame.isna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">isna()</span></code></a></dt>
<dd>Top-level isna.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Show which entries in a DataFrame are NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="nb">dict</span><span class="p">(</span><span class="n">age</span><span class="o">=</span><span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">born</span><span class="o">=</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">NaT</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1939-05-27&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1940-04-25&#39;</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">name</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;Alfred&#39;</span><span class="p">,</span> <span class="s1">&#39;Batman&#39;</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">toy</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="s1">&#39;Batmobile&#39;</span><span class="p">,</span> <span class="s1">&#39;Joker&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> age born name toy</span>
<span class="go">0 5.0 NaT Alfred None</span>
<span class="go">1 6.0 1939-05-27 Batman Batmobile</span>
<span class="go">2 NaN 1940-04-25 Joker</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">isna</span><span class="p">()</span>
<span class="go"> age born name toy</span>
<span class="go">0 False True False True</span>
<span class="go">1 False False False False</span>
<span class="go">2 True False False False</span>
<span class="go">Show which entries in a Series are NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span>
<span class="go">0 5.0</span>
<span class="go">1 6.0</span>
<span class="go">2 NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">isna</span><span class="p">()</span>
<span class="go">0 False</span>
<span class="go">1 False</span>
<span class="go">2 True</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.isna">
<code class="descname">isna</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.isna" title="Permalink to this definition"></a></dt>
<dd><p>Detect missing values.</p>
<p>Return a boolean same-sized object indicating if the values are NA.
NA values, such as None or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.NaN</span></code>, gets mapped to True
values.
Everything else gets mapped to False values. Characters such as empty
strings <code class="docutils literal notranslate"><span class="pre">''</span></code> or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.inf</span></code> are not considered NA values
(unless you set <code class="docutils literal notranslate"><span class="pre">pandas.options.mode.use_inf_as_na</span> <span class="pre">=</span> <span class="pre">True</span></code>).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Mask of bool values for each element in DeferredDataFrame that
indicates whether an element is an NA value.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.isnull" title="apache_beam.dataframe.frames.DeferredDataFrame.isnull"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.isnull()</span></code></a></dt>
<dd>Alias of isna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.notna" title="apache_beam.dataframe.frames.DeferredDataFrame.notna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.notna()</span></code></a></dt>
<dd>Boolean inverse of isna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.dropna" title="apache_beam.dataframe.frames.DeferredDataFrame.dropna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.dropna()</span></code></a></dt>
<dd>Omit axes labels with missing values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.isna" title="apache_beam.dataframe.frames.DeferredDataFrame.isna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">isna()</span></code></a></dt>
<dd>Top-level isna.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Show which entries in a DataFrame are NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="nb">dict</span><span class="p">(</span><span class="n">age</span><span class="o">=</span><span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">born</span><span class="o">=</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">NaT</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1939-05-27&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1940-04-25&#39;</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">name</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;Alfred&#39;</span><span class="p">,</span> <span class="s1">&#39;Batman&#39;</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">toy</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="s1">&#39;Batmobile&#39;</span><span class="p">,</span> <span class="s1">&#39;Joker&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> age born name toy</span>
<span class="go">0 5.0 NaT Alfred None</span>
<span class="go">1 6.0 1939-05-27 Batman Batmobile</span>
<span class="go">2 NaN 1940-04-25 Joker</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">isna</span><span class="p">()</span>
<span class="go"> age born name toy</span>
<span class="go">0 False True False True</span>
<span class="go">1 False False False False</span>
<span class="go">2 True False False False</span>
<span class="go">Show which entries in a Series are NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span>
<span class="go">0 5.0</span>
<span class="go">1 6.0</span>
<span class="go">2 NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">isna</span><span class="p">()</span>
<span class="go">0 False</span>
<span class="go">1 False</span>
<span class="go">2 True</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.notnull">
<code class="descname">notnull</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.notnull" title="Permalink to this definition"></a></dt>
<dd><p>Detect existing (non-missing) values.</p>
<p>Return a boolean same-sized object indicating if the values are not NA.
Non-missing values get mapped to True. Characters such as empty
strings <code class="docutils literal notranslate"><span class="pre">''</span></code> or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.inf</span></code> are not considered NA values
(unless you set <code class="docutils literal notranslate"><span class="pre">pandas.options.mode.use_inf_as_na</span> <span class="pre">=</span> <span class="pre">True</span></code>).
NA values, such as None or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.NaN</span></code>, get mapped to False
values.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Mask of bool values for each element in DeferredDataFrame that
indicates whether an element is not an NA value.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.notnull" title="apache_beam.dataframe.frames.DeferredDataFrame.notnull"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.notnull()</span></code></a></dt>
<dd>Alias of notna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.isna" title="apache_beam.dataframe.frames.DeferredDataFrame.isna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.isna()</span></code></a></dt>
<dd>Boolean inverse of notna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.dropna" title="apache_beam.dataframe.frames.DeferredDataFrame.dropna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.dropna()</span></code></a></dt>
<dd>Omit axes labels with missing values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.notna" title="apache_beam.dataframe.frames.DeferredDataFrame.notna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">notna()</span></code></a></dt>
<dd>Top-level notna.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Show which entries in a DataFrame are not NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="nb">dict</span><span class="p">(</span><span class="n">age</span><span class="o">=</span><span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">born</span><span class="o">=</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">NaT</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1939-05-27&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1940-04-25&#39;</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">name</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;Alfred&#39;</span><span class="p">,</span> <span class="s1">&#39;Batman&#39;</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">toy</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="s1">&#39;Batmobile&#39;</span><span class="p">,</span> <span class="s1">&#39;Joker&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> age born name toy</span>
<span class="go">0 5.0 NaT Alfred None</span>
<span class="go">1 6.0 1939-05-27 Batman Batmobile</span>
<span class="go">2 NaN 1940-04-25 Joker</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">notna</span><span class="p">()</span>
<span class="go"> age born name toy</span>
<span class="go">0 True False True False</span>
<span class="go">1 True True True True</span>
<span class="go">2 False True True True</span>
<span class="go">Show which entries in a Series are not NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span>
<span class="go">0 5.0</span>
<span class="go">1 6.0</span>
<span class="go">2 NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">notna</span><span class="p">()</span>
<span class="go">0 True</span>
<span class="go">1 True</span>
<span class="go">2 False</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.notna">
<code class="descname">notna</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.notna" title="Permalink to this definition"></a></dt>
<dd><p>Detect existing (non-missing) values.</p>
<p>Return a boolean same-sized object indicating if the values are not NA.
Non-missing values get mapped to True. Characters such as empty
strings <code class="docutils literal notranslate"><span class="pre">''</span></code> or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.inf</span></code> are not considered NA values
(unless you set <code class="docutils literal notranslate"><span class="pre">pandas.options.mode.use_inf_as_na</span> <span class="pre">=</span> <span class="pre">True</span></code>).
NA values, such as None or <code class="xref py py-attr docutils literal notranslate"><span class="pre">numpy.NaN</span></code>, get mapped to False
values.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Mask of bool values for each element in DeferredDataFrame that
indicates whether an element is not an NA value.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.notnull" title="apache_beam.dataframe.frames.DeferredDataFrame.notnull"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.notnull()</span></code></a></dt>
<dd>Alias of notna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.isna" title="apache_beam.dataframe.frames.DeferredDataFrame.isna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.isna()</span></code></a></dt>
<dd>Boolean inverse of notna.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.dropna" title="apache_beam.dataframe.frames.DeferredDataFrame.dropna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.dropna()</span></code></a></dt>
<dd>Omit axes labels with missing values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.notna" title="apache_beam.dataframe.frames.DeferredDataFrame.notna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">notna()</span></code></a></dt>
<dd>Top-level notna.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Show which entries in a DataFrame are not NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="nb">dict</span><span class="p">(</span><span class="n">age</span><span class="o">=</span><span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">born</span><span class="o">=</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">NaT</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1939-05-27&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;1940-04-25&#39;</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">name</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;Alfred&#39;</span><span class="p">,</span> <span class="s1">&#39;Batman&#39;</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">toy</span><span class="o">=</span><span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="s1">&#39;Batmobile&#39;</span><span class="p">,</span> <span class="s1">&#39;Joker&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> age born name toy</span>
<span class="go">0 5.0 NaT Alfred None</span>
<span class="go">1 6.0 1939-05-27 Batman Batmobile</span>
<span class="go">2 NaN 1940-04-25 Joker</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">notna</span><span class="p">()</span>
<span class="go"> age born name toy</span>
<span class="go">0 True False True False</span>
<span class="go">1 True True True True</span>
<span class="go">2 False True True True</span>
<span class="go">Show which entries in a Series are not NA.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">NaN</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span>
<span class="go">0 5.0</span>
<span class="go">1 6.0</span>
<span class="go">2 NaN</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">notna</span><span class="p">()</span>
<span class="go">0 True</span>
<span class="go">1 True</span>
<span class="go">2 False</span>
<span class="go">dtype: bool</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.items">
<code class="descname">items</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.items" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.items.html#pandas.DataFrame.items" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.items()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.itertuples">
<code class="descname">itertuples</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.itertuples" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.itertuples.html#pandas.DataFrame.itertuples" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.itertuples()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.iterrows">
<code class="descname">iterrows</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.iterrows" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.iterrows.html#pandas.DataFrame.iterrows" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.iterrows()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.iteritems">
<code class="descname">iteritems</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.iteritems" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.iteritems.html#pandas.DataFrame.iteritems" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.iteritems()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.join">
<code class="descname">join</code><span class="sig-paren">(</span><em>other</em>, <em>on</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.join"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.join" title="Permalink to this definition"></a></dt>
<dd><p>Join columns of another DataFrame.</p>
<p>Join columns with <cite>other</cite> DataFrame either on index or on a key
column. Efficiently join multiple DataFrame objects by index at once by
passing a list.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><em>list of DeferredDataFrame</em>) – Index should be similar to one of the columns in this one. If a
DeferredSeries is passed, its name attribute must be set, and that will be
used as the column name in the resulting joined DeferredDataFrame.</li>
<li><strong>on</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>list of str</em><em>, or </em><em>array-like</em><em>, </em><em>optional</em>) – Column or index level name(s) in the caller to join on the index
in <cite>other</cite>, otherwise joins index-on-index. If multiple
values given, the <cite>other</cite> DeferredDataFrame must have a MultiIndex. Can
pass an array as the join key if it is not already contained in
the calling DeferredDataFrame. Like an Excel VLOOKUP operation.</li>
<li><strong>how</strong> (<em>{'left'</em><em>, </em><em>'right'</em><em>, </em><em>'outer'</em><em>, </em><em>'inner'}</em><em>, </em><em>default 'left'</em>) – <p>How to handle the operation of the two objects.</p>
<ul>
<li>left: use calling frame’s index (or column if on is specified)</li>
<li>right: use <cite>other</cite>’s index.</li>
<li>outer: form union of calling frame’s index (or column if on is
specified) with <cite>other</cite>’s index, and sort it.
lexicographically.</li>
<li>inner: form intersection of calling frame’s index (or column if
on is specified) with <cite>other</cite>’s index, preserving the order
of the calling’s one.</li>
</ul>
</li>
<li><strong>lsuffix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default ''</em>) – Suffix to use from left frame’s overlapping columns.</li>
<li><strong>rsuffix</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default ''</em>) – Suffix to use from right frame’s overlapping columns.</li>
<li><strong>sort</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Order result DeferredDataFrame lexicographically by the join key. If False,
the order of the join key depends on the join type (how keyword).</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A dataframe containing columns from both the caller and <cite>other</cite>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.merge" title="apache_beam.dataframe.frames.DeferredDataFrame.merge"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.merge()</span></code></a></dt>
<dd>For column(s)-on-column(s) operations.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Parameters <cite>on</cite>, <cite>lsuffix</cite>, and <cite>rsuffix</cite> are not supported when
passing a list of <cite>DeferredDataFrame</cite> objects.</p>
<p>Support for specifying index levels as the <cite>on</cite> parameter was added
in pandas version 0.23.0.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;key&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;K0&#39;</span><span class="p">,</span> <span class="s1">&#39;K1&#39;</span><span class="p">,</span> <span class="s1">&#39;K2&#39;</span><span class="p">,</span> <span class="s1">&#39;K3&#39;</span><span class="p">,</span> <span class="s1">&#39;K4&#39;</span><span class="p">,</span> <span class="s1">&#39;K5&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;A0&#39;</span><span class="p">,</span> <span class="s1">&#39;A1&#39;</span><span class="p">,</span> <span class="s1">&#39;A2&#39;</span><span class="p">,</span> <span class="s1">&#39;A3&#39;</span><span class="p">,</span> <span class="s1">&#39;A4&#39;</span><span class="p">,</span> <span class="s1">&#39;A5&#39;</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> key A</span>
<span class="go">0 K0 A0</span>
<span class="go">1 K1 A1</span>
<span class="go">2 K2 A2</span>
<span class="go">3 K3 A3</span>
<span class="go">4 K4 A4</span>
<span class="go">5 K5 A5</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;key&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;K0&#39;</span><span class="p">,</span> <span class="s1">&#39;K1&#39;</span><span class="p">,</span> <span class="s1">&#39;K2&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;B0&#39;</span><span class="p">,</span> <span class="s1">&#39;B1&#39;</span><span class="p">,</span> <span class="s1">&#39;B2&#39;</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> key B</span>
<span class="go">0 K0 B0</span>
<span class="go">1 K1 B1</span>
<span class="go">2 K2 B2</span>
<span class="go">Join DataFrames using their indexes.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">lsuffix</span><span class="o">=</span><span class="s1">&#39;_caller&#39;</span><span class="p">,</span> <span class="n">rsuffix</span><span class="o">=</span><span class="s1">&#39;_other&#39;</span><span class="p">)</span>
<span class="go"> key_caller A key_other B</span>
<span class="go">0 K0 A0 K0 B0</span>
<span class="go">1 K1 A1 K1 B1</span>
<span class="go">2 K2 A2 K2 B2</span>
<span class="go">3 K3 A3 NaN NaN</span>
<span class="go">4 K4 A4 NaN NaN</span>
<span class="go">5 K5 A5 NaN NaN</span>
<span class="go">If we want to join using the key columns, we need to set key to be</span>
<span class="go">the index in both `df` and `other`. The joined DataFrame will have</span>
<span class="go">key as its index.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">set_index</span><span class="p">(</span><span class="s1">&#39;key&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">set_index</span><span class="p">(</span><span class="s1">&#39;key&#39;</span><span class="p">))</span>
<span class="go"> A B</span>
<span class="go">key</span>
<span class="go">K0 A0 B0</span>
<span class="go">K1 A1 B1</span>
<span class="go">K2 A2 B2</span>
<span class="go">K3 A3 NaN</span>
<span class="go">K4 A4 NaN</span>
<span class="go">K5 A5 NaN</span>
<span class="go">Another option to join using the key columns is to use the `on`</span>
<span class="go">parameter. DataFrame.join always uses `other`&#39;s index but we can use</span>
<span class="go">any column in `df`. This method preserves the original DataFrame&#39;s</span>
<span class="go">index in the result.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">other</span><span class="o">.</span><span class="n">set_index</span><span class="p">(</span><span class="s1">&#39;key&#39;</span><span class="p">),</span> <span class="n">on</span><span class="o">=</span><span class="s1">&#39;key&#39;</span><span class="p">)</span>
<span class="go"> key A B</span>
<span class="go">0 K0 A0 B0</span>
<span class="go">1 K1 A1 B1</span>
<span class="go">2 K2 A2 B2</span>
<span class="go">3 K3 A3 NaN</span>
<span class="go">4 K4 A4 NaN</span>
<span class="go">5 K5 A5 NaN</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.merge">
<code class="descname">merge</code><span class="sig-paren">(</span><em>right</em>, <em>on</em>, <em>left_on</em>, <em>right_on</em>, <em>left_index</em>, <em>right_index</em>, <em>suffixes</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.merge"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.merge" title="Permalink to this definition"></a></dt>
<dd><p>Merge DataFrame or named Series objects with a database-style join.</p>
<p>A named Series object is treated as a DataFrame with a single named column.</p>
<p>The join is done on columns or indexes. If joining columns on
columns, the DataFrame indexes <em>will be ignored</em>. Otherwise if joining indexes
on indexes or indexes on a column or columns, the index will be passed on.
When performing a cross merge, no column specifications to merge on are
allowed.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>right</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a><em> or </em><em>named DeferredSeries</em>) – Object to merge with.</li>
<li><strong>how</strong> (<em>{'left'</em><em>, </em><em>'right'</em><em>, </em><em>'outer'</em><em>, </em><em>'inner'</em><em>, </em><em>'cross'}</em><em>, </em><em>default 'inner'</em>) – <p>Type of merge to be performed.</p>
<ul>
<li>left: use only keys from left frame, similar to a SQL left outer join;
preserve key order.</li>
<li>right: use only keys from right frame, similar to a SQL right outer join;
preserve key order.</li>
<li>outer: use union of keys from both frames, similar to a SQL full outer
join; sort keys lexicographically.</li>
<li>inner: use intersection of keys from both frames, similar to a SQL inner
join; preserve the order of the left keys.</li>
<li>cross: creates the cartesian product from both frames, preserves the order
of the left keys.<div class="versionadded">
<p><span class="versionmodified">New in version 1.2.0.</span></p>
</div>
</li>
</ul>
</li>
<li><strong>on</strong> (<em>label</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a>) – Column or index level names to join on. These must be found in both
DeferredDataFrames. If <cite>on</cite> is None and not merging on indexes then this defaults
to the intersection of the columns in both DeferredDataFrames.</li>
<li><strong>left_on</strong> (<em>label</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, or </em><em>array-like</em>) – Column or index level names to join on in the left DeferredDataFrame. Can also
be an array or list of arrays of the length of the left DeferredDataFrame.
These arrays are treated as if they are columns.</li>
<li><strong>right_on</strong> (<em>label</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, or </em><em>array-like</em>) – Column or index level names to join on in the right DeferredDataFrame. Can also
be an array or list of arrays of the length of the right DeferredDataFrame.
These arrays are treated as if they are columns.</li>
<li><strong>left_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Use the index from the left DeferredDataFrame as the join key(s). If it is a
MultiIndex, the number of keys in the other DeferredDataFrame (either the index
or a number of columns) must match the number of levels.</li>
<li><strong>right_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Use the index from the right DeferredDataFrame as the join key. Same caveats as
left_index.</li>
<li><strong>sort</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Sort the join keys lexicographically in the result DeferredDataFrame. If False,
the order of the join keys depends on the join type (how keyword).</li>
<li><strong>suffixes</strong> (<em>list-like</em><em>, </em><em>default is</em><em> (</em><em>&quot;_x&quot;</em><em>, </em><em>&quot;_y&quot;</em><em>)</em>) – A length-2 sequence where each element is optionally a string
indicating the suffix to add to overlapping column names in
<cite>left</cite> and <cite>right</cite> respectively. Pass a value of <cite>None</cite> instead
of a string to indicate that the column name from <cite>left</cite> or
<cite>right</cite> should be left as-is, with no suffix. At least one of the
values must not be None.</li>
<li><strong>copy</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – If False, avoid copy if possible.</li>
<li><strong>indicator</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default False</em>) – If True, adds a column to the output DeferredDataFrame called “_merge” with
information on the source of each row. The column can be given a different
name by providing a string argument. The column will have a Categorical
type with the value of “left_only” for observations whose merge key only
appears in the left DeferredDataFrame, “right_only” for observations
whose merge key only appears in the right DeferredDataFrame, and “both”
if the observation’s merge key is found in both DeferredDataFrames.</li>
<li><strong>validate</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – <p>If specified, checks if merge is of specified type.</p>
<ul>
<li>”one_to_one” or “1:1”: check if merge keys are unique in both
left and right datasets.</li>
<li>”one_to_many” or “1:m”: check if merge keys are unique in left
dataset.</li>
<li>”many_to_one” or “m:1”: check if merge keys are unique in right
dataset.</li>
<li>”many_to_many” or “m:m”: allowed, but does not result in checks.</li>
</ul>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A DeferredDataFrame of the two merged objects.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>merge is not parallelizable unless <code class="docutils literal notranslate"><span class="pre">left_index</span></code> or <code class="docutils literal notranslate"><span class="pre">right_index</span></code> is
<a href="#id70"><span class="problematic" id="id71">``</span></a>True`, because it requires generating an entirely new unique index.
See notes on <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.reset_index" title="apache_beam.dataframe.frames.DeferredDataFrame.reset_index"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.reset_index()</span></code></a>. It is recommended to
move the join key for one of your columns to the index to avoid this issue.
For an example see the enrich pipeline in
<code class="xref py py-mod docutils literal notranslate"><span class="pre">apache_beam.examples.dataframe.taxiride</span></code>.</p>
<p><code class="docutils literal notranslate"><span class="pre">how=&quot;cross&quot;</span></code> is not yet supported.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">merge_ordered()</span></code></dt>
<dd>Merge with optional filling/interpolation.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">merge_asof()</span></code></dt>
<dd>Merge on nearest keys.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.join" title="apache_beam.dataframe.frames.DeferredDataFrame.join"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.join()</span></code></a></dt>
<dd>Similar method using indices.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Support for specifying index levels as the <cite>on</cite>, <cite>left_on</cite>, and
<cite>right_on</cite> parameters was added in pandas version 0.23.0
Support for merging named DeferredSeries objects was added in pandas version 0.24.0</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;lkey&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;foo&#39;</span><span class="p">,</span> <span class="s1">&#39;bar&#39;</span><span class="p">,</span> <span class="s1">&#39;baz&#39;</span><span class="p">,</span> <span class="s1">&#39;foo&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;value&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">5</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;rkey&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;foo&#39;</span><span class="p">,</span> <span class="s1">&#39;bar&#39;</span><span class="p">,</span> <span class="s1">&#39;baz&#39;</span><span class="p">,</span> <span class="s1">&#39;foo&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;value&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span>
<span class="go"> lkey value</span>
<span class="go">0 foo 1</span>
<span class="go">1 bar 2</span>
<span class="go">2 baz 3</span>
<span class="go">3 foo 5</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span>
<span class="go"> rkey value</span>
<span class="go">0 foo 5</span>
<span class="go">1 bar 6</span>
<span class="go">2 baz 7</span>
<span class="go">3 foo 8</span>
<span class="go">Merge df1 and df2 on the lkey and rkey columns. The value columns have</span>
<span class="go">the default suffixes, _x and _y, appended.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">left_on</span><span class="o">=</span><span class="s1">&#39;lkey&#39;</span><span class="p">,</span> <span class="n">right_on</span><span class="o">=</span><span class="s1">&#39;rkey&#39;</span><span class="p">)</span>
<span class="go"> lkey value_x rkey value_y</span>
<span class="go">0 foo 1 foo 5</span>
<span class="go">1 foo 1 foo 8</span>
<span class="go">2 foo 5 foo 5</span>
<span class="go">3 foo 5 foo 8</span>
<span class="go">4 bar 2 bar 6</span>
<span class="go">5 baz 3 baz 7</span>
<span class="go">Merge DataFrames df1 and df2 with specified left and right suffixes</span>
<span class="go">appended to any overlapping columns.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">left_on</span><span class="o">=</span><span class="s1">&#39;lkey&#39;</span><span class="p">,</span> <span class="n">right_on</span><span class="o">=</span><span class="s1">&#39;rkey&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">suffixes</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;_left&#39;</span><span class="p">,</span> <span class="s1">&#39;_right&#39;</span><span class="p">))</span>
<span class="go"> lkey value_left rkey value_right</span>
<span class="go">0 foo 1 foo 5</span>
<span class="go">1 foo 1 foo 8</span>
<span class="go">2 foo 5 foo 5</span>
<span class="go">3 foo 5 foo 8</span>
<span class="go">4 bar 2 bar 6</span>
<span class="go">5 baz 3 baz 7</span>
<span class="go">Merge DataFrames df1 and df2, but raise an exception if the DataFrames have</span>
<span class="go">any overlapping columns.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">left_on</span><span class="o">=</span><span class="s1">&#39;lkey&#39;</span><span class="p">,</span> <span class="n">right_on</span><span class="o">=</span><span class="s1">&#39;rkey&#39;</span><span class="p">,</span> <span class="n">suffixes</span><span class="o">=</span><span class="p">(</span><span class="kc">False</span><span class="p">,</span> <span class="kc">False</span><span class="p">))</span>
<span class="gt">Traceback (most recent call last):</span>
<span class="c">...</span>
<span class="gr">ValueError</span>: <span class="n">columns overlap but no suffix specified:</span>
<span class="go"> Index([&#39;value&#39;], dtype=&#39;object&#39;)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;foo&#39;</span><span class="p">,</span> <span class="s1">&#39;bar&#39;</span><span class="p">],</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;foo&#39;</span><span class="p">,</span> <span class="s1">&#39;baz&#39;</span><span class="p">],</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span>
<span class="go"> a b</span>
<span class="go">0 foo 1</span>
<span class="go">1 bar 2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span>
<span class="go"> a c</span>
<span class="go">0 foo 3</span>
<span class="go">1 baz 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">how</span><span class="o">=</span><span class="s1">&#39;inner&#39;</span><span class="p">,</span> <span class="n">on</span><span class="o">=</span><span class="s1">&#39;a&#39;</span><span class="p">)</span>
<span class="go"> a b c</span>
<span class="go">0 foo 1 3</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">how</span><span class="o">=</span><span class="s1">&#39;left&#39;</span><span class="p">,</span> <span class="n">on</span><span class="o">=</span><span class="s1">&#39;a&#39;</span><span class="p">)</span>
<span class="go"> a b c</span>
<span class="go">0 foo 1 3.0</span>
<span class="go">1 bar 2 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;left&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;foo&#39;</span><span class="p">,</span> <span class="s1">&#39;bar&#39;</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;right&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span>
<span class="go"> left</span>
<span class="go">0 foo</span>
<span class="go">1 bar</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span>
<span class="go"> right</span>
<span class="go">0 7</span>
<span class="go">1 8</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">how</span><span class="o">=</span><span class="s1">&#39;cross&#39;</span><span class="p">)</span>
<span class="go"> left right</span>
<span class="go">0 foo 7</span>
<span class="go">1 foo 8</span>
<span class="go">2 bar 7</span>
<span class="go">3 bar 8</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.nlargest">
<code class="descname">nlargest</code><span class="sig-paren">(</span><em>keep</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.nlargest"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.nlargest" title="Permalink to this definition"></a></dt>
<dd><p>Return the first <cite>n</cite> rows ordered by <cite>columns</cite> in descending order.</p>
<p>Return the first <cite>n</cite> rows with the largest values in <cite>columns</cite>, in
descending order. The columns that are not specified are returned as
well, but not used for ordering.</p>
<p>This method is equivalent to
<code class="docutils literal notranslate"><span class="pre">df.sort_values(columns,</span> <span class="pre">ascending=False).head(n)</span></code>, but more
performant.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>n</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a>) – Number of rows to return.</li>
<li><strong>columns</strong> (<em>label</em><em> or </em><em>list of labels</em>) – Column label(s) to order by.</li>
<li><strong>keep</strong> (<em>{'first'</em><em>, </em><em>'last'</em><em>, </em><em>'all'}</em><em>, </em><em>default 'first'</em>) – <p>Where there are duplicate values:</p>
<ul>
<li><cite>first</cite> : prioritize the first occurrence(s)</li>
<li><cite>last</cite> : prioritize the last occurrence(s)</li>
<li><dl class="first docutils">
<dt><code class="docutils literal notranslate"><span class="pre">all</span></code> <span class="classifier-delimiter">:</span> <span class="classifier">do not drop any duplicates, even it means</span></dt>
<dd>selecting more than <cite>n</cite> items.</dd>
</dl>
</li>
</ul>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The first <cite>n</cite> rows ordered by the given columns in descending
order.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only <code class="docutils literal notranslate"><span class="pre">keep=False</span></code> and <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> are supported. Other values of
<code class="docutils literal notranslate"><span class="pre">keep</span></code> make this an order-sensitive operation. Note <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> is
a Beam-specific option that guarantees only one duplicate will be kept, but
unlike <code class="docutils literal notranslate"><span class="pre">&quot;first&quot;</span></code> and <code class="docutils literal notranslate"><span class="pre">&quot;last&quot;</span></code> it makes no guarantees about _which_
duplicate element is kept.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.nsmallest" title="apache_beam.dataframe.frames.DeferredDataFrame.nsmallest"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.nsmallest()</span></code></a></dt>
<dd>Return the first <cite>n</cite> rows ordered by <cite>columns</cite> in ascending order.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sort_values" title="apache_beam.dataframe.frames.DeferredDataFrame.sort_values"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sort_values()</span></code></a></dt>
<dd>Sort DeferredDataFrame by the values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.head" title="apache_beam.dataframe.frames.DeferredDataFrame.head"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.head()</span></code></a></dt>
<dd>Return the first <cite>n</cite> rows without re-ordering.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>This function cannot be used with all column types. For example, when
specifying columns with <cite>object</cite> or <cite>category</cite> dtypes, <code class="docutils literal notranslate"><span class="pre">TypeError</span></code> is
raised.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;population&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">59000000</span><span class="p">,</span> <span class="mi">65000000</span><span class="p">,</span> <span class="mi">434000</span><span class="p">,</span>
<span class="gp">... </span> <span class="mi">434000</span><span class="p">,</span> <span class="mi">434000</span><span class="p">,</span> <span class="mi">337000</span><span class="p">,</span> <span class="mi">11300</span><span class="p">,</span>
<span class="gp">... </span> <span class="mi">11300</span><span class="p">,</span> <span class="mi">11300</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;GDP&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1937894</span><span class="p">,</span> <span class="mi">2583560</span> <span class="p">,</span> <span class="mi">12011</span><span class="p">,</span> <span class="mi">4520</span><span class="p">,</span> <span class="mi">12128</span><span class="p">,</span>
<span class="gp">... </span> <span class="mi">17036</span><span class="p">,</span> <span class="mi">182</span><span class="p">,</span> <span class="mi">38</span><span class="p">,</span> <span class="mi">311</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;alpha-2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;IT&quot;</span><span class="p">,</span> <span class="s2">&quot;FR&quot;</span><span class="p">,</span> <span class="s2">&quot;MT&quot;</span><span class="p">,</span> <span class="s2">&quot;MV&quot;</span><span class="p">,</span> <span class="s2">&quot;BN&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;IS&quot;</span><span class="p">,</span> <span class="s2">&quot;NR&quot;</span><span class="p">,</span> <span class="s2">&quot;TV&quot;</span><span class="p">,</span> <span class="s2">&quot;AI&quot;</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;Italy&quot;</span><span class="p">,</span> <span class="s2">&quot;France&quot;</span><span class="p">,</span> <span class="s2">&quot;Malta&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;Maldives&quot;</span><span class="p">,</span> <span class="s2">&quot;Brunei&quot;</span><span class="p">,</span> <span class="s2">&quot;Iceland&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;Nauru&quot;</span><span class="p">,</span> <span class="s2">&quot;Tuvalu&quot;</span><span class="p">,</span> <span class="s2">&quot;Anguilla&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> population GDP alpha-2</span>
<span class="go">Italy 59000000 1937894 IT</span>
<span class="go">France 65000000 2583560 FR</span>
<span class="go">Malta 434000 12011 MT</span>
<span class="go">Maldives 434000 4520 MV</span>
<span class="go">Brunei 434000 12128 BN</span>
<span class="go">Iceland 337000 17036 IS</span>
<span class="go">Nauru 11300 182 NR</span>
<span class="go">Tuvalu 11300 38 TV</span>
<span class="go">Anguilla 11300 311 AI</span>
<span class="go">In the following example, we will use ``nlargest`` to select the three</span>
<span class="go">rows having the largest values in column &quot;population&quot;.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">nlargest</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="s1">&#39;population&#39;</span><span class="p">)</span>
<span class="go"> population GDP alpha-2</span>
<span class="go">France 65000000 2583560 FR</span>
<span class="go">Italy 59000000 1937894 IT</span>
<span class="go">Malta 434000 12011 MT</span>
<span class="go">When using ``keep=&#39;last&#39;``, ties are resolved in reverse order:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">nlargest</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="s1">&#39;population&#39;</span><span class="p">,</span> <span class="n">keep</span><span class="o">=</span><span class="s1">&#39;last&#39;</span><span class="p">)</span>
<span class="go"> population GDP alpha-2</span>
<span class="go">France 65000000 2583560 FR</span>
<span class="go">Italy 59000000 1937894 IT</span>
<span class="go">Brunei 434000 12128 BN</span>
<span class="go">When using ``keep=&#39;all&#39;``, all duplicate items are maintained:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">nlargest</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="s1">&#39;population&#39;</span><span class="p">,</span> <span class="n">keep</span><span class="o">=</span><span class="s1">&#39;all&#39;</span><span class="p">)</span>
<span class="go"> population GDP alpha-2</span>
<span class="go">France 65000000 2583560 FR</span>
<span class="go">Italy 59000000 1937894 IT</span>
<span class="go">Malta 434000 12011 MT</span>
<span class="go">Maldives 434000 4520 MV</span>
<span class="go">Brunei 434000 12128 BN</span>
<span class="go">To order by the largest values in column &quot;population&quot; and then &quot;GDP&quot;,</span>
<span class="go">we can specify multiple columns like in the next example.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">nlargest</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;population&#39;</span><span class="p">,</span> <span class="s1">&#39;GDP&#39;</span><span class="p">])</span>
<span class="go"> population GDP alpha-2</span>
<span class="go">France 65000000 2583560 FR</span>
<span class="go">Italy 59000000 1937894 IT</span>
<span class="go">Brunei 434000 12128 BN</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.nsmallest">
<code class="descname">nsmallest</code><span class="sig-paren">(</span><em>keep</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.nsmallest"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.nsmallest" title="Permalink to this definition"></a></dt>
<dd><p>Return the first <cite>n</cite> rows ordered by <cite>columns</cite> in ascending order.</p>
<p>Return the first <cite>n</cite> rows with the smallest values in <cite>columns</cite>, in
ascending order. The columns that are not specified are returned as
well, but not used for ordering.</p>
<p>This method is equivalent to
<code class="docutils literal notranslate"><span class="pre">df.sort_values(columns,</span> <span class="pre">ascending=True).head(n)</span></code>, but more
performant.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>n</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a>) – Number of items to retrieve.</li>
<li><strong>columns</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – Column name or names to order by.</li>
<li><strong>keep</strong> (<em>{'first'</em><em>, </em><em>'last'</em><em>, </em><em>'all'}</em><em>, </em><em>default 'first'</em>) – <p>Where there are duplicate values:</p>
<ul>
<li><code class="docutils literal notranslate"><span class="pre">first</span></code> : take the first occurrence.</li>
<li><code class="docutils literal notranslate"><span class="pre">last</span></code> : take the last occurrence.</li>
<li><code class="docutils literal notranslate"><span class="pre">all</span></code> : do not drop any duplicates, even it means
selecting more than <cite>n</cite> items.</li>
</ul>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only <code class="docutils literal notranslate"><span class="pre">keep=False</span></code> and <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> are supported. Other values of
<code class="docutils literal notranslate"><span class="pre">keep</span></code> make this an order-sensitive operation. Note <code class="docutils literal notranslate"><span class="pre">keep=&quot;any&quot;</span></code> is
a Beam-specific option that guarantees only one duplicate will be kept, but
unlike <code class="docutils literal notranslate"><span class="pre">&quot;first&quot;</span></code> and <code class="docutils literal notranslate"><span class="pre">&quot;last&quot;</span></code> it makes no guarantees about _which_
duplicate element is kept.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.nlargest" title="apache_beam.dataframe.frames.DeferredDataFrame.nlargest"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.nlargest()</span></code></a></dt>
<dd>Return the first <cite>n</cite> rows ordered by <cite>columns</cite> in descending order.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sort_values" title="apache_beam.dataframe.frames.DeferredDataFrame.sort_values"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sort_values()</span></code></a></dt>
<dd>Sort DeferredDataFrame by the values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.head" title="apache_beam.dataframe.frames.DeferredDataFrame.head"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.head()</span></code></a></dt>
<dd>Return the first <cite>n</cite> rows without re-ordering.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;population&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">59000000</span><span class="p">,</span> <span class="mi">65000000</span><span class="p">,</span> <span class="mi">434000</span><span class="p">,</span>
<span class="gp">... </span> <span class="mi">434000</span><span class="p">,</span> <span class="mi">434000</span><span class="p">,</span> <span class="mi">337000</span><span class="p">,</span> <span class="mi">337000</span><span class="p">,</span>
<span class="gp">... </span> <span class="mi">11300</span><span class="p">,</span> <span class="mi">11300</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;GDP&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1937894</span><span class="p">,</span> <span class="mi">2583560</span> <span class="p">,</span> <span class="mi">12011</span><span class="p">,</span> <span class="mi">4520</span><span class="p">,</span> <span class="mi">12128</span><span class="p">,</span>
<span class="gp">... </span> <span class="mi">17036</span><span class="p">,</span> <span class="mi">182</span><span class="p">,</span> <span class="mi">38</span><span class="p">,</span> <span class="mi">311</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;alpha-2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;IT&quot;</span><span class="p">,</span> <span class="s2">&quot;FR&quot;</span><span class="p">,</span> <span class="s2">&quot;MT&quot;</span><span class="p">,</span> <span class="s2">&quot;MV&quot;</span><span class="p">,</span> <span class="s2">&quot;BN&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;IS&quot;</span><span class="p">,</span> <span class="s2">&quot;NR&quot;</span><span class="p">,</span> <span class="s2">&quot;TV&quot;</span><span class="p">,</span> <span class="s2">&quot;AI&quot;</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;Italy&quot;</span><span class="p">,</span> <span class="s2">&quot;France&quot;</span><span class="p">,</span> <span class="s2">&quot;Malta&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;Maldives&quot;</span><span class="p">,</span> <span class="s2">&quot;Brunei&quot;</span><span class="p">,</span> <span class="s2">&quot;Iceland&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;Nauru&quot;</span><span class="p">,</span> <span class="s2">&quot;Tuvalu&quot;</span><span class="p">,</span> <span class="s2">&quot;Anguilla&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> population GDP alpha-2</span>
<span class="go">Italy 59000000 1937894 IT</span>
<span class="go">France 65000000 2583560 FR</span>
<span class="go">Malta 434000 12011 MT</span>
<span class="go">Maldives 434000 4520 MV</span>
<span class="go">Brunei 434000 12128 BN</span>
<span class="go">Iceland 337000 17036 IS</span>
<span class="go">Nauru 337000 182 NR</span>
<span class="go">Tuvalu 11300 38 TV</span>
<span class="go">Anguilla 11300 311 AI</span>
<span class="go">In the following example, we will use ``nsmallest`` to select the</span>
<span class="go">three rows having the smallest values in column &quot;population&quot;.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">nsmallest</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="s1">&#39;population&#39;</span><span class="p">)</span>
<span class="go"> population GDP alpha-2</span>
<span class="go">Tuvalu 11300 38 TV</span>
<span class="go">Anguilla 11300 311 AI</span>
<span class="go">Iceland 337000 17036 IS</span>
<span class="go">When using ``keep=&#39;last&#39;``, ties are resolved in reverse order:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">nsmallest</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="s1">&#39;population&#39;</span><span class="p">,</span> <span class="n">keep</span><span class="o">=</span><span class="s1">&#39;last&#39;</span><span class="p">)</span>
<span class="go"> population GDP alpha-2</span>
<span class="go">Anguilla 11300 311 AI</span>
<span class="go">Tuvalu 11300 38 TV</span>
<span class="go">Nauru 337000 182 NR</span>
<span class="go">When using ``keep=&#39;all&#39;``, all duplicate items are maintained:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">nsmallest</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="s1">&#39;population&#39;</span><span class="p">,</span> <span class="n">keep</span><span class="o">=</span><span class="s1">&#39;all&#39;</span><span class="p">)</span>
<span class="go"> population GDP alpha-2</span>
<span class="go">Tuvalu 11300 38 TV</span>
<span class="go">Anguilla 11300 311 AI</span>
<span class="go">Iceland 337000 17036 IS</span>
<span class="go">Nauru 337000 182 NR</span>
<span class="go">To order by the smallest values in column &quot;population&quot; and then &quot;GDP&quot;, we can</span>
<span class="go">specify multiple columns like in the next example.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">nsmallest</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;population&#39;</span><span class="p">,</span> <span class="s1">&#39;GDP&#39;</span><span class="p">])</span>
<span class="go"> population GDP alpha-2</span>
<span class="go">Tuvalu 11300 38 TV</span>
<span class="go">Anguilla 11300 311 AI</span>
<span class="go">Nauru 337000 182 NR</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.plot">
<code class="descname">plot</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.plot" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.plot.html#pandas.DataFrame.plot" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.plot()</span></code></a> is not yet supported in the Beam DataFrame API because it is a plotting tool.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-plotting-tools">https://s.apache.org/dataframe-plotting-tools</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.pop">
<code class="descname">pop</code><span class="sig-paren">(</span><em>item</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.pop"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.pop" title="Permalink to this definition"></a></dt>
<dd><p>Return item and drop from frame. Raise KeyError if not found.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>item</strong> (<em>label</em>) – Label of column to be popped.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"></td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([(</span><span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;bird&#39;</span><span class="p">,</span> <span class="mf">389.0</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;parrot&#39;</span><span class="p">,</span> <span class="s1">&#39;bird&#39;</span><span class="p">,</span> <span class="mf">24.0</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;lion&#39;</span><span class="p">,</span> <span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="mf">80.5</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;monkey&#39;</span><span class="p">,</span> <span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;name&#39;</span><span class="p">,</span> <span class="s1">&#39;class&#39;</span><span class="p">,</span> <span class="s1">&#39;max_speed&#39;</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> name class max_speed</span>
<span class="go">0 falcon bird 389.0</span>
<span class="go">1 parrot bird 24.0</span>
<span class="go">2 lion mammal 80.5</span>
<span class="go">3 monkey mammal NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s1">&#39;class&#39;</span><span class="p">)</span>
<span class="go">0 bird</span>
<span class="go">1 bird</span>
<span class="go">2 mammal</span>
<span class="go">3 mammal</span>
<span class="go">Name: class, dtype: object</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> name max_speed</span>
<span class="go">0 falcon 389.0</span>
<span class="go">1 parrot 24.0</span>
<span class="go">2 lion 80.5</span>
<span class="go">3 monkey NaN</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.quantile">
<code class="descname">quantile</code><span class="sig-paren">(</span><em>q</em>, <em>axis</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.quantile"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.quantile" title="Permalink to this definition"></a></dt>
<dd><p>Return values at the given quantile over requested axis.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>q</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><em>array-like</em><em>, </em><em>default 0.5</em><em> (</em><em>50% quantile</em><em>)</em>) – Value between 0 &lt;= q &lt;= 1, the quantile(s) to compute.</li>
<li><strong>axis</strong> (<em>{0</em><em>, </em><em>1</em><em>, </em><em>'index'</em><em>, </em><em>'columns'}</em><em>, </em><em>default 0</em>) – Equals 0 or ‘index’ for row-wise, 1 or ‘columns’ for column-wise.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – If False, the quantile of datetime and timedelta data will be
computed as well.</li>
<li><strong>interpolation</strong> (<em>{'linear'</em><em>, </em><em>'lower'</em><em>, </em><em>'higher'</em><em>, </em><em>'midpoint'</em><em>, </em><em>'nearest'}</em>) – <p>This optional parameter specifies the interpolation method to use,
when the desired quantile lies between two data points <cite>i</cite> and <cite>j</cite>:</p>
<ul>
<li>linear: <cite>i + (j - i) * fraction</cite>, where <cite>fraction</cite> is the
fractional part of the index surrounded by <cite>i</cite> and <cite>j</cite>.</li>
<li>lower: <cite>i</cite>.</li>
<li>higher: <cite>j</cite>.</li>
<li>nearest: <cite>i</cite> or <cite>j</cite> whichever is nearest.</li>
<li>midpoint: (<cite>i</cite> + <cite>j</cite>) / 2.</li>
</ul>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><dl class="docutils">
<dt>If <code class="docutils literal notranslate"><span class="pre">q</span></code> is an array, a DeferredDataFrame will be returned where the</dt>
<dd><p class="first last">index is <code class="docutils literal notranslate"><span class="pre">q</span></code>, the columns are the columns of self, and the
values are the quantiles.</p>
</dd>
<dt>If <code class="docutils literal notranslate"><span class="pre">q</span></code> is a float, a DeferredSeries will be returned where the</dt>
<dd><p class="first last">index is the columns of self and the values are the quantiles.</p>
</dd>
</dl>
</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">quantile(axis=&quot;index&quot;)</span></code> is not parallelizable. See
<a class="reference external" href="https://issues.apache.org/jira/browse/BEAM-12167">BEAM-12167</a> tracking
the possible addition of an approximate, parallelizable implementation of
quantile.</p>
<p>When using quantile with <code class="docutils literal notranslate"><span class="pre">axis=&quot;columns&quot;</span></code> only a single <code class="docutils literal notranslate"><span class="pre">q</span></code> value can be
specified.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">core.window.Rolling.quantile()</span></code></dt>
<dd>Rolling quantile.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.percentile()</span></code></dt>
<dd>Numpy function to compute the percentile.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">10</span><span class="p">],</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">100</span><span class="p">]]),</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="mf">.1</span><span class="p">)</span>
<span class="go">a 1.3</span>
<span class="go">b 3.7</span>
<span class="go">Name: 0.1, dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">quantile</span><span class="p">([</span><span class="mf">.1</span><span class="p">,</span> <span class="mf">.5</span><span class="p">])</span>
<span class="go"> a b</span>
<span class="go">0.1 1.3 3.7</span>
<span class="go">0.5 2.5 55.0</span>
<span class="go">Specifying `numeric_only=False` will also compute the quantile of</span>
<span class="go">datetime and timedelta data.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;2010&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;2011&#39;</span><span class="p">)],</span>
<span class="gp">... </span> <span class="s1">&#39;C&#39;</span><span class="p">:</span> <span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">Timedelta</span><span class="p">(</span><span class="s1">&#39;1 days&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">pd</span><span class="o">.</span><span class="n">Timedelta</span><span class="p">(</span><span class="s1">&#39;2 days&#39;</span><span class="p">)]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">quantile</span><span class="p">(</span><span class="mf">0.5</span><span class="p">,</span> <span class="n">numeric_only</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">A 1.5</span>
<span class="go">B 2010-07-02 12:00:00</span>
<span class="go">C 1 days 12:00:00</span>
<span class="go">Name: 0.5, dtype: object</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.rename">
<code class="descname">rename</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.rename"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.rename" title="Permalink to this definition"></a></dt>
<dd><p>Alter axes labels.</p>
<p>Function / dict values must be unique (1-to-1). Labels not contained in
a dict / Series will be left as-is. Extra labels listed don’t throw an
error.</p>
<p>See the <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/basics.html#basics-rename" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">user guide</span></a> for more.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>mapper</strong> (<em>dict-like</em><em> or </em><em>function</em>) – Dict-like or function transformations to apply to
that axis’ values. Use either <code class="docutils literal notranslate"><span class="pre">mapper</span></code> and <code class="docutils literal notranslate"><span class="pre">axis</span></code> to
specify the axis to target with <code class="docutils literal notranslate"><span class="pre">mapper</span></code>, or <code class="docutils literal notranslate"><span class="pre">index</span></code> and
<code class="docutils literal notranslate"><span class="pre">columns</span></code>.</li>
<li><strong>index</strong> (<em>dict-like</em><em> or </em><em>function</em>) – Alternative to specifying axis (<code class="docutils literal notranslate"><span class="pre">mapper,</span> <span class="pre">axis=0</span></code>
is equivalent to <code class="docutils literal notranslate"><span class="pre">index=mapper</span></code>).</li>
<li><strong>columns</strong> (<em>dict-like</em><em> or </em><em>function</em>) – Alternative to specifying axis (<code class="docutils literal notranslate"><span class="pre">mapper,</span> <span class="pre">axis=1</span></code>
is equivalent to <code class="docutils literal notranslate"><span class="pre">columns=mapper</span></code>).</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – Axis to target with <code class="docutils literal notranslate"><span class="pre">mapper</span></code>. Can be either the axis name
(‘index’, ‘columns’) or number (0, 1). The default is ‘index’.</li>
<li><strong>copy</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Also copy underlying data.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Whether to return a new DeferredDataFrame. If True then value of copy is
ignored.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – In case of a MultiIndex, only rename labels in the specified
level.</li>
<li><strong>errors</strong> (<em>{'ignore'</em><em>, </em><em>'raise'}</em><em>, </em><em>default 'ignore'</em>) – If ‘raise’, raise a <cite>KeyError</cite> when a dict-like <cite>mapper</cite>, <cite>index</cite>,
or <cite>columns</cite> contains labels that are not present in the Index
being transformed.
If ‘ignore’, existing keys will be renamed and extra keys will be
ignored.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">DeferredDataFrame with the renamed axis labels or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#KeyError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">KeyError</span></code></a> – If any of the labels is not found in the selected axis and
“errors=’raise’”.</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>rename is not parallelizable when <code class="docutils literal notranslate"><span class="pre">axis=&quot;index&quot;</span></code> and
<code class="docutils literal notranslate"><span class="pre">errors=&quot;raise&quot;</span></code>. It requires collecting all data on a single
node in order to detect if one of the index values is missing.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.rename_axis" title="apache_beam.dataframe.frames.DeferredDataFrame.rename_axis"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.rename_axis()</span></code></a></dt>
<dd>Set the name of the axis.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">``DataFrame.rename`` supports two calling conventions</span>
<span class="go">* ``(index=index_mapper, columns=columns_mapper, ...)``</span>
<span class="go">* ``(mapper, axis={&#39;index&#39;, &#39;columns&#39;}, ...)``</span>
<span class="go">We *highly* recommend using keyword arguments to clarify your</span>
<span class="go">intent.</span>
<span class="go">Rename columns using a mapping:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;A&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s2">&quot;B&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="p">{</span><span class="s2">&quot;A&quot;</span><span class="p">:</span> <span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;B&quot;</span><span class="p">:</span> <span class="s2">&quot;c&quot;</span><span class="p">})</span>
<span class="go"> a c</span>
<span class="go">0 1 4</span>
<span class="go">1 2 5</span>
<span class="go">2 3 6</span>
<span class="go">Rename index using a mapping:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="s2">&quot;x&quot;</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s2">&quot;y&quot;</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="s2">&quot;z&quot;</span><span class="p">})</span>
<span class="go"> A B</span>
<span class="go">x 1 4</span>
<span class="go">y 2 5</span>
<span class="go">z 3 6</span>
<span class="go">Cast index labels to a different type:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">index</span>
<span class="go">RangeIndex(start=0, stop=3, step=1)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="nb">str</span><span class="p">)</span><span class="o">.</span><span class="n">index</span>
<span class="go">Index([&#39;0&#39;, &#39;1&#39;, &#39;2&#39;], dtype=&#39;object&#39;)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="p">{</span><span class="s2">&quot;A&quot;</span><span class="p">:</span> <span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;B&quot;</span><span class="p">:</span> <span class="s2">&quot;b&quot;</span><span class="p">,</span> <span class="s2">&quot;C&quot;</span><span class="p">:</span> <span class="s2">&quot;c&quot;</span><span class="p">},</span> <span class="n">errors</span><span class="o">=</span><span class="s2">&quot;raise&quot;</span><span class="p">)</span>
<span class="gt">Traceback (most recent call last):</span>
<span class="gr">KeyError</span>: <span class="n">[&#39;C&#39;] not found in axis</span>
<span class="go">Using axis-style parameters:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rename</span><span class="p">(</span><span class="nb">str</span><span class="o">.</span><span class="n">lower</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> a b</span>
<span class="go">0 1 4</span>
<span class="go">1 2 5</span>
<span class="go">2 3 6</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rename</span><span class="p">({</span><span class="mi">1</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="mi">4</span><span class="p">},</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 1 4</span>
<span class="go">2 2 5</span>
<span class="go">4 3 6</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.rename_axis">
<code class="descname">rename_axis</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.rename_axis" title="Permalink to this definition"></a></dt>
<dd><p>Set the name of the axis for the index or columns.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>mapper</strong> (<em>scalar</em><em>, </em><em>list-like</em><em>, </em><em>optional</em>) – Value to set the axis name attribute.</li>
<li><strong>columns</strong> (<em>index</em><em>,</em>) – <p>A scalar, list-like, dict-like or functions transformations to
apply to that axis’ values.
Note that the <code class="docutils literal notranslate"><span class="pre">columns</span></code> parameter is not allowed if the
object is a DeferredSeries. This parameter only apply for DeferredDataFrame
type objects.</p>
<p>Use either <code class="docutils literal notranslate"><span class="pre">mapper</span></code> and <code class="docutils literal notranslate"><span class="pre">axis</span></code> to
specify the axis to target with <code class="docutils literal notranslate"><span class="pre">mapper</span></code>, or <code class="docutils literal notranslate"><span class="pre">index</span></code>
and/or <code class="docutils literal notranslate"><span class="pre">columns</span></code>.</p>
</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – The axis to rename.</li>
<li><strong>copy</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Also copy underlying data.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Modifies the object directly, instead of creating a new DeferredSeries
or DeferredDataFrame.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The same type as the caller or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a>, <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a>, or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.rename" title="apache_beam.dataframe.frames.DeferredSeries.rename"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.rename()</span></code></a></dt>
<dd>Alter DeferredSeries index labels or name.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.rename" title="apache_beam.dataframe.frames.DeferredDataFrame.rename"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.rename()</span></code></a></dt>
<dd>Alter DeferredDataFrame index labels or name.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">Index.rename()</span></code></dt>
<dd>Set new names on index.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p><code class="docutils literal notranslate"><span class="pre">DeferredDataFrame.rename_axis</span></code> supports two calling conventions</p>
<ul class="simple">
<li><code class="docutils literal notranslate"><span class="pre">(index=index_mapper,</span> <span class="pre">columns=columns_mapper,</span> <span class="pre">...)</span></code></li>
<li><code class="docutils literal notranslate"><span class="pre">(mapper,</span> <span class="pre">axis={'index',</span> <span class="pre">'columns'},</span> <span class="pre">...)</span></code></li>
</ul>
<p>The first calling convention will only modify the names of
the index and/or the names of the Index object that is the columns.
In this case, the parameter <code class="docutils literal notranslate"><span class="pre">copy</span></code> is ignored.</p>
<p>The second calling convention will modify the names of the
corresponding index if mapper is a list or a scalar.
However, if mapper is dict-like or a function, it will use the
deprecated behavior of modifying the axis <em>labels</em>.</p>
<p>We <em>highly</em> recommend using keyword arguments to clarify your
intent.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">**Series**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s2">&quot;dog&quot;</span><span class="p">,</span> <span class="s2">&quot;cat&quot;</span><span class="p">,</span> <span class="s2">&quot;monkey&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 dog</span>
<span class="go">1 cat</span>
<span class="go">2 monkey</span>
<span class="go">dtype: object</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">rename_axis</span><span class="p">(</span><span class="s2">&quot;animal&quot;</span><span class="p">)</span>
<span class="go">animal</span>
<span class="go">0 dog</span>
<span class="go">1 cat</span>
<span class="go">2 monkey</span>
<span class="go">dtype: object</span>
<span class="go">**DataFrame**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;num_legs&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span>
<span class="gp">... </span> <span class="s2">&quot;num_arms&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">]},</span>
<span class="gp">... </span> <span class="p">[</span><span class="s2">&quot;dog&quot;</span><span class="p">,</span> <span class="s2">&quot;cat&quot;</span><span class="p">,</span> <span class="s2">&quot;monkey&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> num_legs num_arms</span>
<span class="go">dog 4 0</span>
<span class="go">cat 4 0</span>
<span class="go">monkey 2 2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">rename_axis</span><span class="p">(</span><span class="s2">&quot;animal&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> num_legs num_arms</span>
<span class="go">animal</span>
<span class="go">dog 4 0</span>
<span class="go">cat 4 0</span>
<span class="go">monkey 2 2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">rename_axis</span><span class="p">(</span><span class="s2">&quot;limbs&quot;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="s2">&quot;columns&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go">limbs num_legs num_arms</span>
<span class="go">animal</span>
<span class="go">dog 4 0</span>
<span class="go">cat 4 0</span>
<span class="go">monkey 2 2</span>
<span class="go">**MultiIndex**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">index</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_product</span><span class="p">([[</span><span class="s1">&#39;mammal&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;dog&#39;</span><span class="p">,</span> <span class="s1">&#39;cat&#39;</span><span class="p">,</span> <span class="s1">&#39;monkey&#39;</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">names</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;type&#39;</span><span class="p">,</span> <span class="s1">&#39;name&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go">limbs num_legs num_arms</span>
<span class="go">type name</span>
<span class="go">mammal dog 4 0</span>
<span class="go"> cat 4 0</span>
<span class="go"> monkey 2 2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rename_axis</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;type&#39;</span><span class="p">:</span> <span class="s1">&#39;class&#39;</span><span class="p">})</span>
<span class="go">limbs num_legs num_arms</span>
<span class="go">class name</span>
<span class="go">mammal dog 4 0</span>
<span class="go"> cat 4 0</span>
<span class="go"> monkey 2 2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rename_axis</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="nb">str</span><span class="o">.</span><span class="n">upper</span><span class="p">)</span>
<span class="go">LIMBS num_legs num_arms</span>
<span class="go">type name</span>
<span class="go">mammal dog 4 0</span>
<span class="go"> cat 4 0</span>
<span class="go"> monkey 2 2</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.round">
<code class="descname">round</code><span class="sig-paren">(</span><em>decimals</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.round"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.round" title="Permalink to this definition"></a></dt>
<dd><p>Round a DataFrame to a variable number of decimal places.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>decimals</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a>) – Number of decimal places to round each column to. If an int is
given, round each column to the same number of places.
Otherwise dict and DeferredSeries round to variable numbers of places.
Column names should be in the keys if <cite>decimals</cite> is a
dict-like, or in the index if <cite>decimals</cite> is a DeferredSeries. Any
columns not included in <cite>decimals</cite> will be left as is. Elements
of <cite>decimals</cite> which are not columns of the input will be
ignored.</li>
<li><strong>*args</strong> – Additional keywords have no effect but might be accepted for
compatibility with numpy.</li>
<li><strong>**kwargs</strong> – Additional keywords have no effect but might be accepted for
compatibility with numpy.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A DeferredDataFrame with the affected columns rounded to the specified
number of decimal places.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.around()</span></code></dt>
<dd>Round a numpy array to the given number of decimals.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.round" title="apache_beam.dataframe.frames.DeferredSeries.round"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.round()</span></code></a></dt>
<dd>Round a DeferredSeries to the given number of decimals.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([(</span><span class="mf">.21</span><span class="p">,</span> <span class="mf">.32</span><span class="p">),</span> <span class="p">(</span><span class="mf">.01</span><span class="p">,</span> <span class="mf">.67</span><span class="p">),</span> <span class="p">(</span><span class="mf">.66</span><span class="p">,</span> <span class="mf">.03</span><span class="p">),</span> <span class="p">(</span><span class="mf">.21</span><span class="p">,</span> <span class="mf">.18</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;dogs&#39;</span><span class="p">,</span> <span class="s1">&#39;cats&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> dogs cats</span>
<span class="go">0 0.21 0.32</span>
<span class="go">1 0.01 0.67</span>
<span class="go">2 0.66 0.03</span>
<span class="go">3 0.21 0.18</span>
<span class="go">By providing an integer each column is rounded to the same number</span>
<span class="go">of decimal places</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> dogs cats</span>
<span class="go">0 0.2 0.3</span>
<span class="go">1 0.0 0.7</span>
<span class="go">2 0.7 0.0</span>
<span class="go">3 0.2 0.2</span>
<span class="go">With a dict, the number of places for specific columns can be</span>
<span class="go">specified with the column names as key and the number of decimal</span>
<span class="go">places as value</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">round</span><span class="p">({</span><span class="s1">&#39;dogs&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;cats&#39;</span><span class="p">:</span> <span class="mi">0</span><span class="p">})</span>
<span class="go"> dogs cats</span>
<span class="go">0 0.2 0.0</span>
<span class="go">1 0.0 1.0</span>
<span class="go">2 0.7 0.0</span>
<span class="go">3 0.2 0.0</span>
<span class="go">Using a Series, the number of places for specific columns can be</span>
<span class="go">specified with the column names as index and the number of</span>
<span class="go">decimal places as value</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">decimals</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;cats&#39;</span><span class="p">,</span> <span class="s1">&#39;dogs&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">round</span><span class="p">(</span><span class="n">decimals</span><span class="p">)</span>
<span class="go"> dogs cats</span>
<span class="go">0 0.2 0.0</span>
<span class="go">1 0.0 1.0</span>
<span class="go">2 0.7 0.0</span>
<span class="go">3 0.2 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.select_dtypes">
<code class="descname">select_dtypes</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.select_dtypes" title="Permalink to this definition"></a></dt>
<dd><p>Return a subset of the DataFrame’s columns based on the column dtypes.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>exclude</strong> (<em>include</em><em>,</em>) – A selection of dtypes or strings to be included/excluded. At least
one of these parameters must be supplied.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">The subset of the frame including the dtypes in <code class="docutils literal notranslate"><span class="pre">include</span></code> and
excluding the dtypes in <code class="docutils literal notranslate"><span class="pre">exclude</span></code>.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#ValueError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> – * If both of <code class="docutils literal notranslate"><span class="pre">include</span></code> and <code class="docutils literal notranslate"><span class="pre">exclude</span></code> are empty
* If <code class="docutils literal notranslate"><span class="pre">include</span></code> and <code class="docutils literal notranslate"><span class="pre">exclude</span></code> have overlapping elements
* If any kind of string dtype is passed in.</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.dtypes" title="apache_beam.dataframe.frames.DeferredDataFrame.dtypes"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.dtypes()</span></code></a></dt>
<dd>Return DeferredSeries with the data type of each column.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<ul class="simple">
<li>To select all <em>numeric</em> types, use <code class="docutils literal notranslate"><span class="pre">np.number</span></code> or <code class="docutils literal notranslate"><span class="pre">'number'</span></code></li>
<li>To select strings you must use the <code class="docutils literal notranslate"><span class="pre">object</span></code> dtype, but note that
this will return <em>all</em> object dtype columns</li>
<li>See the <a class="reference external" href="https://numpy.org/doc/stable/reference/arrays.scalars.html">numpy dtype hierarchy</a></li>
<li>To select datetimes, use <code class="docutils literal notranslate"><span class="pre">np.datetime64</span></code>, <code class="docutils literal notranslate"><span class="pre">'datetime'</span></code> or
<code class="docutils literal notranslate"><span class="pre">'datetime64'</span></code></li>
<li>To select timedeltas, use <code class="docutils literal notranslate"><span class="pre">np.timedelta64</span></code>, <code class="docutils literal notranslate"><span class="pre">'timedelta'</span></code> or
<code class="docutils literal notranslate"><span class="pre">'timedelta64'</span></code></li>
<li>To select Pandas categorical dtypes, use <code class="docutils literal notranslate"><span class="pre">'category'</span></code></li>
<li>To select Pandas datetimetz dtypes, use <code class="docutils literal notranslate"><span class="pre">'datetimetz'</span></code> (new in
0.20.0) or <code class="docutils literal notranslate"><span class="pre">'datetime64[ns,</span> <span class="pre">tz]'</span></code></li>
</ul>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span> <span class="o">*</span> <span class="mi">3</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">]</span> <span class="o">*</span> <span class="mi">3</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">2.0</span><span class="p">]</span> <span class="o">*</span> <span class="mi">3</span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> a b c</span>
<span class="go">0 1 True 1.0</span>
<span class="go">1 2 False 2.0</span>
<span class="go">2 1 True 1.0</span>
<span class="go">3 2 False 2.0</span>
<span class="go">4 1 True 1.0</span>
<span class="go">5 2 False 2.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">select_dtypes</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="s1">&#39;bool&#39;</span><span class="p">)</span>
<span class="go"> b</span>
<span class="go">0 True</span>
<span class="go">1 False</span>
<span class="go">2 True</span>
<span class="go">3 False</span>
<span class="go">4 True</span>
<span class="go">5 False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">select_dtypes</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;float64&#39;</span><span class="p">])</span>
<span class="go"> c</span>
<span class="go">0 1.0</span>
<span class="go">1 2.0</span>
<span class="go">2 1.0</span>
<span class="go">3 2.0</span>
<span class="go">4 1.0</span>
<span class="go">5 2.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">select_dtypes</span><span class="p">(</span><span class="n">exclude</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;int64&#39;</span><span class="p">])</span>
<span class="go"> b c</span>
<span class="go">0 True 1.0</span>
<span class="go">1 False 2.0</span>
<span class="go">2 True 1.0</span>
<span class="go">3 False 2.0</span>
<span class="go">4 True 1.0</span>
<span class="go">5 False 2.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.shift">
<code class="descname">shift</code><span class="sig-paren">(</span><em>axis</em>, <em>freq</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.shift"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.shift" title="Permalink to this definition"></a></dt>
<dd><p>Shift index by desired number of periods with an optional time <cite>freq</cite>.</p>
<p>When <cite>freq</cite> is not passed, shift the index without realigning the data.
If <cite>freq</cite> is passed (in this case, the index must be date or datetime,
or it will raise a <cite>NotImplementedError</cite>), the index will be
increased using the periods and the <cite>freq</cite>. <cite>freq</cite> can be inferred
when specified as “infer” as long as either freq or inferred_freq
attribute is set in the index.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>periods</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a>) – Number of periods to shift. Can be positive or negative.</li>
<li><strong>freq</strong> (<em>DateOffset</em><em>, </em><em>tseries.offsets</em><em>, </em><em>timedelta</em><em>, or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – Offset to use from the tseries module or time rule (e.g. ‘EOM’).
If <cite>freq</cite> is specified then the index values are shifted but the
data is not realigned. That is, use <cite>freq</cite> if you would like to
extend the index when shifting and preserve the original data.
If <cite>freq</cite> is specified as “infer” then it will be inferred from
the freq or inferred_freq attributes of the index. If neither of
those attributes exist, a ValueError is thrown.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'</em><em>, </em><em>None}</em><em>, </em><em>default None</em>) – Shift direction.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><em>object</em></a><em>, </em><em>optional</em>) – <p>The scalar value to use for newly introduced missing values.
the default depends on the dtype of <cite>self</cite>.
For numeric data, <code class="docutils literal notranslate"><span class="pre">np.nan</span></code> is used.
For datetime, timedelta, or period data, etc. <code class="xref py py-attr docutils literal notranslate"><span class="pre">NaT</span></code> is used.
For extension dtypes, <code class="docutils literal notranslate"><span class="pre">self.dtype.na_value</span></code> is used.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.1.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Copy of input object, shifted.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>shift with <code class="docutils literal notranslate"><span class="pre">axis=&quot;index&quot;</span> <span class="pre">is</span> <span class="pre">only</span> <span class="pre">supported</span> <span class="pre">with</span> <span class="pre">``freq</span></code> specified and
<code class="docutils literal notranslate"><span class="pre">fill_value</span></code> undefined. Other configurations make this operation
order-sensitive.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">Index.shift()</span></code></dt>
<dd>Shift values of Index.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">DatetimeIndex.shift()</span></code></dt>
<dd>Shift values of DatetimeIndex.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">PeriodIndex.shift()</span></code></dt>
<dd>Shift values of PeriodIndex.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.tshift" title="apache_beam.dataframe.frames.DeferredDataFrame.tshift"><code class="xref py py-meth docutils literal notranslate"><span class="pre">tshift()</span></code></a></dt>
<dd>Shift the time index, using the index’s frequency if available.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;Col1&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">10</span><span class="p">,</span> <span class="mi">20</span><span class="p">,</span> <span class="mi">15</span><span class="p">,</span> <span class="mi">30</span><span class="p">,</span> <span class="mi">45</span><span class="p">],</span>
<span class="gp">... </span> <span class="s2">&quot;Col2&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">13</span><span class="p">,</span> <span class="mi">23</span><span class="p">,</span> <span class="mi">18</span><span class="p">,</span> <span class="mi">33</span><span class="p">,</span> <span class="mi">48</span><span class="p">],</span>
<span class="gp">... </span> <span class="s2">&quot;Col3&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">17</span><span class="p">,</span> <span class="mi">27</span><span class="p">,</span> <span class="mi">22</span><span class="p">,</span> <span class="mi">37</span><span class="p">,</span> <span class="mi">52</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s2">&quot;2020-01-01&quot;</span><span class="p">,</span> <span class="s2">&quot;2020-01-05&quot;</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> Col1 Col2 Col3</span>
<span class="go">2020-01-01 10 13 17</span>
<span class="go">2020-01-02 20 23 27</span>
<span class="go">2020-01-03 15 18 22</span>
<span class="go">2020-01-04 30 33 37</span>
<span class="go">2020-01-05 45 48 52</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">shift</span><span class="p">(</span><span class="n">periods</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
<span class="go"> Col1 Col2 Col3</span>
<span class="go">2020-01-01 NaN NaN NaN</span>
<span class="go">2020-01-02 NaN NaN NaN</span>
<span class="go">2020-01-03 NaN NaN NaN</span>
<span class="go">2020-01-04 10.0 13.0 17.0</span>
<span class="go">2020-01-05 20.0 23.0 27.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">shift</span><span class="p">(</span><span class="n">periods</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="s2">&quot;columns&quot;</span><span class="p">)</span>
<span class="go"> Col1 Col2 Col3</span>
<span class="go">2020-01-01 NaN 10 13</span>
<span class="go">2020-01-02 NaN 20 23</span>
<span class="go">2020-01-03 NaN 15 18</span>
<span class="go">2020-01-04 NaN 30 33</span>
<span class="go">2020-01-05 NaN 45 48</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">shift</span><span class="p">(</span><span class="n">periods</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> Col1 Col2 Col3</span>
<span class="go">2020-01-01 0 0 0</span>
<span class="go">2020-01-02 0 0 0</span>
<span class="go">2020-01-03 0 0 0</span>
<span class="go">2020-01-04 10 13 17</span>
<span class="go">2020-01-05 20 23 27</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">shift</span><span class="p">(</span><span class="n">periods</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">freq</span><span class="o">=</span><span class="s2">&quot;D&quot;</span><span class="p">)</span>
<span class="go"> Col1 Col2 Col3</span>
<span class="go">2020-01-04 10 13 17</span>
<span class="go">2020-01-05 20 23 27</span>
<span class="go">2020-01-06 15 18 22</span>
<span class="go">2020-01-07 30 33 37</span>
<span class="go">2020-01-08 45 48 52</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">shift</span><span class="p">(</span><span class="n">periods</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">freq</span><span class="o">=</span><span class="s2">&quot;infer&quot;</span><span class="p">)</span>
<span class="go"> Col1 Col2 Col3</span>
<span class="go">2020-01-04 10 13 17</span>
<span class="go">2020-01-05 20 23 27</span>
<span class="go">2020-01-06 15 18 22</span>
<span class="go">2020-01-07 30 33 37</span>
<span class="go">2020-01-08 45 48 52</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.shape">
<code class="descname">shape</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.shape" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.shape()</span></code> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.stack">
<code class="descname">stack</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.stack" title="Permalink to this definition"></a></dt>
<dd><p>Stack the prescribed level(s) from columns to index.</p>
<p>Return a reshaped DataFrame or Series having a multi-level
index with one or more new inner-most levels compared to the current
DataFrame. The new inner-most levels are created by pivoting the
columns of the current dataframe:</p>
<blockquote>
<div><ul class="simple">
<li>if the columns have a single level, the output is a Series;</li>
<li>if the columns have multiple levels, the new index
level(s) is (are) taken from the prescribed level(s) and
the output is a DataFrame.</li>
</ul>
</div></blockquote>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, </em><em>default -1</em>) – Level(s) to stack from the column axis onto the index
axis, defined as one index or label, or a list of indices
or labels.</li>
<li><strong>dropna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Whether to drop rows in the resulting Frame/DeferredSeries with
missing values. Stacking a column level onto the index
axis can create combinations of index and column values
that are missing from the original dataframe. See Examples
section.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Stacked dataframe or series.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.unstack" title="apache_beam.dataframe.frames.DeferredDataFrame.unstack"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.unstack()</span></code></a></dt>
<dd>Unstack prescribed level(s) from index axis onto column axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pivot" title="apache_beam.dataframe.frames.DeferredDataFrame.pivot"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pivot()</span></code></a></dt>
<dd>Reshape dataframe from long format to wide format.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pivot_table" title="apache_beam.dataframe.frames.DeferredDataFrame.pivot_table"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pivot_table()</span></code></a></dt>
<dd>Create a spreadsheet-style pivot table as a DeferredDataFrame.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>The function is named by analogy with a collection of books
being reorganized from being side by side on a horizontal
position (the columns of the dataframe) to being stacked
vertically on top of each other (in the index of the
dataframe).</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">**Single level columns**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_single_level_cols</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;cat&#39;</span><span class="p">,</span> <span class="s1">&#39;dog&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;weight&#39;</span><span class="p">,</span> <span class="s1">&#39;height&#39;</span><span class="p">])</span>
<span class="go">Stacking a dataframe with a single level column axis returns a Series:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_single_level_cols</span>
<span class="go"> weight height</span>
<span class="go">cat 0 1</span>
<span class="go">dog 2 3</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_single_level_cols</span><span class="o">.</span><span class="n">stack</span><span class="p">()</span>
<span class="go">cat weight 0</span>
<span class="go"> height 1</span>
<span class="go">dog weight 2</span>
<span class="go"> height 3</span>
<span class="go">dtype: int64</span>
<span class="go">**Multi level columns: simple case**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">multicol1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_tuples</span><span class="p">([(</span><span class="s1">&#39;weight&#39;</span><span class="p">,</span> <span class="s1">&#39;kg&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;weight&#39;</span><span class="p">,</span> <span class="s1">&#39;pounds&#39;</span><span class="p">)])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multi_level_cols1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;cat&#39;</span><span class="p">,</span> <span class="s1">&#39;dog&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="n">multicol1</span><span class="p">)</span>
<span class="go">Stacking a dataframe with a multi-level column axis:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multi_level_cols1</span>
<span class="go"> weight</span>
<span class="go"> kg pounds</span>
<span class="go">cat 1 2</span>
<span class="go">dog 2 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multi_level_cols1</span><span class="o">.</span><span class="n">stack</span><span class="p">()</span>
<span class="go"> weight</span>
<span class="go">cat kg 1</span>
<span class="go"> pounds 2</span>
<span class="go">dog kg 2</span>
<span class="go"> pounds 4</span>
<span class="go">**Missing values**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">multicol2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_tuples</span><span class="p">([(</span><span class="s1">&#39;weight&#39;</span><span class="p">,</span> <span class="s1">&#39;kg&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;height&#39;</span><span class="p">,</span> <span class="s1">&#39;m&#39;</span><span class="p">)])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multi_level_cols2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">2.0</span><span class="p">],</span> <span class="p">[</span><span class="mf">3.0</span><span class="p">,</span> <span class="mf">4.0</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;cat&#39;</span><span class="p">,</span> <span class="s1">&#39;dog&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="n">multicol2</span><span class="p">)</span>
<span class="go">It is common to have missing values when stacking a dataframe</span>
<span class="go">with multi-level columns, as the stacked dataframe typically</span>
<span class="go">has more values than the original dataframe. Missing values</span>
<span class="go">are filled with NaNs:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multi_level_cols2</span>
<span class="go"> weight height</span>
<span class="go"> kg m</span>
<span class="go">cat 1.0 2.0</span>
<span class="go">dog 3.0 4.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multi_level_cols2</span><span class="o">.</span><span class="n">stack</span><span class="p">()</span>
<span class="go"> height weight</span>
<span class="go">cat kg NaN 1.0</span>
<span class="go"> m 2.0 NaN</span>
<span class="go">dog kg NaN 3.0</span>
<span class="go"> m 4.0 NaN</span>
<span class="go">**Prescribing the level(s) to be stacked**</span>
<span class="go">The first parameter controls which level or levels are stacked:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multi_level_cols2</span><span class="o">.</span><span class="n">stack</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> kg m</span>
<span class="go">cat height NaN 2.0</span>
<span class="go"> weight 1.0 NaN</span>
<span class="go">dog height NaN 4.0</span>
<span class="go"> weight 3.0 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multi_level_cols2</span><span class="o">.</span><span class="n">stack</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span>
<span class="go">cat height m 2.0</span>
<span class="go"> weight kg 1.0</span>
<span class="go">dog height m 4.0</span>
<span class="go"> weight kg 3.0</span>
<span class="go">dtype: float64</span>
<span class="go">**Dropping missing values**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multi_level_cols3</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="kc">None</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">],</span> <span class="p">[</span><span class="mf">2.0</span><span class="p">,</span> <span class="mf">3.0</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;cat&#39;</span><span class="p">,</span> <span class="s1">&#39;dog&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="n">multicol2</span><span class="p">)</span>
<span class="go">Note that rows where all values are missing are dropped by</span>
<span class="go">default but this behaviour can be controlled via the dropna</span>
<span class="go">keyword parameter:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multi_level_cols3</span>
<span class="go"> weight height</span>
<span class="go"> kg m</span>
<span class="go">cat NaN 1.0</span>
<span class="go">dog 2.0 3.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multi_level_cols3</span><span class="o">.</span><span class="n">stack</span><span class="p">(</span><span class="n">dropna</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go"> height weight</span>
<span class="go">cat kg NaN NaN</span>
<span class="go"> m 1.0 NaN</span>
<span class="go">dog kg NaN 2.0</span>
<span class="go"> m 3.0 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multi_level_cols3</span><span class="o">.</span><span class="n">stack</span><span class="p">(</span><span class="n">dropna</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> height weight</span>
<span class="go">cat m 1.0 NaN</span>
<span class="go">dog kg NaN 2.0</span>
<span class="go"> m 3.0 NaN</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.all">
<code class="descname">all</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.all" title="Permalink to this definition"></a></dt>
<dd><p>Return whether all elements are True, potentially over an axis.</p>
<p>Returns True unless there at least one element within a series or
along a Dataframe axis that is False or equivalent (e.g. zero or
empty).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'</em><em>, </em><em>None}</em><em>, </em><em>default 0</em>) – <p>Indicate which axis or axes should be reduced.</p>
<ul>
<li>0 / ‘index’ : reduce the index, return a DeferredSeries whose index is the
original column labels.</li>
<li>1 / ‘columns’ : reduce the columns, return a DeferredSeries whose index is the
original index.</li>
<li>None : reduce all axes, return a scalar.</li>
</ul>
</li>
<li><strong>bool_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only boolean columns. If None, will attempt to use everything,
then use only boolean data. Not implemented for DeferredSeries.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values. If the entire row/column is NA and skipna is
True, then the result will be True, as for an empty row/column.
If skipna is False, then NA are treated as True, because these are not
equal to zero.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
<li><strong>**kwargs</strong> (<em>any</em><em>, </em><em>default None</em>) – Additional keywords have no effect but might be accepted for
compatibility with NumPy.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">If level is specified, then, DeferredDataFrame is returned; otherwise, DeferredSeries
is returned.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.all" title="apache_beam.dataframe.frames.DeferredSeries.all"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.all()</span></code></a></dt>
<dd>Return True if all elements are True.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.any" title="apache_beam.dataframe.frames.DeferredDataFrame.any"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.any()</span></code></a></dt>
<dd>Return True if one (or more) elements are True.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">**Series**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="kc">True</span><span class="p">,</span> <span class="kc">True</span><span class="p">])</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
<span class="go">True</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">])</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
<span class="go">True</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
<span class="go">True</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">skipna</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">True</span>
<span class="go">**DataFrames**</span>
<span class="go">Create a dataframe from a dictionary.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">True</span><span class="p">],</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> col1 col2</span>
<span class="go">0 True True</span>
<span class="go">1 True False</span>
<span class="go">Default behaviour checks if column-wise values all return True.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">all</span><span class="p">()</span>
<span class="go">col1 True</span>
<span class="go">col2 False</span>
<span class="go">dtype: bool</span>
<span class="go">Specify ``axis=&#39;columns&#39;`` to check if row-wise values all return True.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go">0 True</span>
<span class="go">1 False</span>
<span class="go">dtype: bool</span>
<span class="go">Or ``axis=None`` for whether every value is True.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">all</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
<span class="go">False</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.any">
<code class="descname">any</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.any" title="Permalink to this definition"></a></dt>
<dd><p>Return whether any element is True, potentially over an axis.</p>
<p>Returns False unless there is at least one element within a series or
along a Dataframe axis that is True or equivalent (e.g. non-zero or
non-empty).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'</em><em>, </em><em>None}</em><em>, </em><em>default 0</em>) – <p>Indicate which axis or axes should be reduced.</p>
<ul>
<li>0 / ‘index’ : reduce the index, return a DeferredSeries whose index is the
original column labels.</li>
<li>1 / ‘columns’ : reduce the columns, return a DeferredSeries whose index is the
original index.</li>
<li>None : reduce all axes, return a scalar.</li>
</ul>
</li>
<li><strong>bool_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only boolean columns. If None, will attempt to use everything,
then use only boolean data. Not implemented for DeferredSeries.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values. If the entire row/column is NA and skipna is
True, then the result will be False, as for an empty row/column.
If skipna is False, then NA are treated as True, because these are not
equal to zero.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
<li><strong>**kwargs</strong> (<em>any</em><em>, </em><em>default None</em>) – Additional keywords have no effect but might be accepted for
compatibility with NumPy.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">If level is specified, then, DeferredDataFrame is returned; otherwise, DeferredSeries
is returned.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.any()</span></code></dt>
<dd>Numpy version of this method.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.any" title="apache_beam.dataframe.frames.DeferredSeries.any"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.any()</span></code></a></dt>
<dd>Return whether any element is True.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.all" title="apache_beam.dataframe.frames.DeferredSeries.all"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.all()</span></code></a></dt>
<dd>Return whether all elements are True.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.any" title="apache_beam.dataframe.frames.DeferredDataFrame.any"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.any()</span></code></a></dt>
<dd>Return whether any element is True over requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.all" title="apache_beam.dataframe.frames.DeferredDataFrame.all"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.all()</span></code></a></dt>
<dd>Return whether all elements are True over requested axis.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">**Series**</span>
<span class="go">For Series input, the output is a scalar indicating whether any element</span>
<span class="go">is True.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="kc">False</span><span class="p">,</span> <span class="kc">False</span><span class="p">])</span><span class="o">.</span><span class="n">any</span><span class="p">()</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">])</span><span class="o">.</span><span class="n">any</span><span class="p">()</span>
<span class="go">True</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">any</span><span class="p">()</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">any</span><span class="p">()</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="n">skipna</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">True</span>
<span class="go">**DataFrame**</span>
<span class="go">Whether each column contains at least one True element (the default).</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;A&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s2">&quot;B&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s2">&quot;C&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B C</span>
<span class="go">0 1 0 0</span>
<span class="go">1 2 2 0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">any</span><span class="p">()</span>
<span class="go">A True</span>
<span class="go">B True</span>
<span class="go">C False</span>
<span class="go">dtype: bool</span>
<span class="go">Aggregating over the columns.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;A&quot;</span><span class="p">:</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">],</span> <span class="s2">&quot;B&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 True 1</span>
<span class="go">1 False 2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go">0 True</span>
<span class="go">1 True</span>
<span class="go">dtype: bool</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;A&quot;</span><span class="p">:</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">],</span> <span class="s2">&quot;B&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 True 1</span>
<span class="go">1 False 0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go">0 True</span>
<span class="go">1 False</span>
<span class="go">dtype: bool</span>
<span class="go">Aggregating over the entire DataFrame with ``axis=None``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">any</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span>
<span class="go">True</span>
<span class="go">`any` for an empty DataFrame is an empty Series.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([])</span><span class="o">.</span><span class="n">any</span><span class="p">()</span>
<span class="go">Series([], dtype: bool)</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.count">
<code class="descname">count</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.count" title="Permalink to this definition"></a></dt>
<dd><p>Count non-NA cells for each column or row.</p>
<p>The values <cite>None</cite>, <cite>NaN</cite>, <cite>NaT</cite>, and optionally <cite>numpy.inf</cite> (depending
on <cite>pandas.options.mode.use_inf_as_na</cite>) are considered NA.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – If 0 or ‘index’ counts are generated for each column.
If 1 or ‘columns’ counts are generated for each row.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – If the axis is a <cite>MultiIndex</cite> (hierarchical), count along a
particular <cite>level</cite>, collapsing into a <cite>DeferredDataFrame</cite>.
A <cite>str</cite> specifies the level name.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Include only <cite>float</cite>, <cite>int</cite> or <cite>boolean</cite> data.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">For each column/row the number of non-NA/null entries.
If <cite>level</cite> is specified returns a <cite>DeferredDataFrame</cite>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.count" title="apache_beam.dataframe.frames.DeferredSeries.count"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.count()</span></code></a></dt>
<dd>Number of non-NA elements in a DeferredSeries.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.value_counts" title="apache_beam.dataframe.frames.DeferredDataFrame.value_counts"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.value_counts()</span></code></a></dt>
<dd>Count unique combinations of columns.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.shape" title="apache_beam.dataframe.frames.DeferredDataFrame.shape"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.shape()</span></code></a></dt>
<dd>Number of DeferredDataFrame rows and columns (including NA elements).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.isna" title="apache_beam.dataframe.frames.DeferredDataFrame.isna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.isna()</span></code></a></dt>
<dd>Boolean same-sized DeferredDataFrame showing places of NA elements.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Constructing DataFrame from a dictionary:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;Person&quot;</span><span class="p">:</span>
<span class="gp">... </span> <span class="p">[</span><span class="s2">&quot;John&quot;</span><span class="p">,</span> <span class="s2">&quot;Myla&quot;</span><span class="p">,</span> <span class="s2">&quot;Lewis&quot;</span><span class="p">,</span> <span class="s2">&quot;John&quot;</span><span class="p">,</span> <span class="s2">&quot;Myla&quot;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s2">&quot;Age&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mf">24.</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mf">21.</span><span class="p">,</span> <span class="mi">33</span><span class="p">,</span> <span class="mi">26</span><span class="p">],</span>
<span class="gp">... </span> <span class="s2">&quot;Single&quot;</span><span class="p">:</span> <span class="p">[</span><span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> Person Age Single</span>
<span class="go">0 John 24.0 False</span>
<span class="go">1 Myla NaN True</span>
<span class="go">2 Lewis 21.0 True</span>
<span class="go">3 John 33.0 True</span>
<span class="go">4 Myla 26.0 False</span>
<span class="go">Notice the uncounted NA values:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">count</span><span class="p">()</span>
<span class="go">Person 5</span>
<span class="go">Age 4</span>
<span class="go">Single 5</span>
<span class="go">dtype: int64</span>
<span class="go">Counts for each **row**:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">count</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go">0 3</span>
<span class="go">1 2</span>
<span class="go">2 3</span>
<span class="go">3 3</span>
<span class="go">4 3</span>
<span class="go">dtype: int64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.describe">
<code class="descname">describe</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.describe" title="Permalink to this definition"></a></dt>
<dd><p>Generate descriptive statistics.</p>
<p>Descriptive statistics include those that summarize the central
tendency, dispersion and shape of a
dataset’s distribution, excluding <code class="docutils literal notranslate"><span class="pre">NaN</span></code> values.</p>
<p>Analyzes both numeric and object series, as well
as <code class="docutils literal notranslate"><span class="pre">DataFrame</span></code> column sets of mixed data types. The output
will vary depending on what is provided. Refer to the notes
below for more detail.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>percentiles</strong> (<em>list-like of numbers</em><em>, </em><em>optional</em>) – The percentiles to include in the output. All should
fall between 0 and 1. The default is
<code class="docutils literal notranslate"><span class="pre">[.25,</span> <span class="pre">.5,</span> <span class="pre">.75]</span></code>, which returns the 25th, 50th, and
75th percentiles.</li>
<li><strong>include</strong> (<em>'all'</em><em>, </em><em>list-like of dtypes</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> (</em><em>default</em><em>)</em><em>, </em><em>optional</em>) – <p>A white list of data types to include in the result. Ignored
for <code class="docutils literal notranslate"><span class="pre">DeferredSeries</span></code>. Here are the options:</p>
<ul>
<li>’all’ : All columns of the input will be included in the output.</li>
<li>A list-like of dtypes : Limits the results to the
provided data types.
To limit the result to numeric types submit
<code class="docutils literal notranslate"><span class="pre">numpy.number</span></code>. To limit it instead to object columns submit
the <code class="docutils literal notranslate"><span class="pre">numpy.object</span></code> data type. Strings
can also be used in the style of
<code class="docutils literal notranslate"><span class="pre">select_dtypes</span></code> (e.g. <code class="docutils literal notranslate"><span class="pre">df.describe(include=['O'])</span></code>). To
select pandas categorical columns, use <code class="docutils literal notranslate"><span class="pre">'category'</span></code></li>
<li>None (default) : The result will include all numeric columns.</li>
</ul>
</li>
<li><strong>exclude</strong> (<em>list-like of dtypes</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em> (</em><em>default</em><em>)</em><em>, </em><em>optional</em><em>,</em>) – <p>A black list of data types to omit from the result. Ignored
for <code class="docutils literal notranslate"><span class="pre">DeferredSeries</span></code>. Here are the options:</p>
<ul>
<li>A list-like of dtypes : Excludes the provided data types
from the result. To exclude numeric types submit
<code class="docutils literal notranslate"><span class="pre">numpy.number</span></code>. To exclude object columns submit the data
type <code class="docutils literal notranslate"><span class="pre">numpy.object</span></code>. Strings can also be used in the style of
<code class="docutils literal notranslate"><span class="pre">select_dtypes</span></code> (e.g. <code class="docutils literal notranslate"><span class="pre">df.describe(include=['O'])</span></code>). To
exclude pandas categorical columns, use <code class="docutils literal notranslate"><span class="pre">'category'</span></code></li>
<li>None (default) : The result will exclude nothing.</li>
</ul>
</li>
<li><strong>datetime_is_numeric</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>Whether to treat datetime dtypes as numeric. This affects statistics
calculated for the column. For DeferredDataFrame input, this also
controls whether datetime columns are included by default.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Summary statistics of the DeferredSeries or Dataframe provided.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">describe</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.count" title="apache_beam.dataframe.frames.DeferredDataFrame.count"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.count()</span></code></a></dt>
<dd>Count number of non-NA/null observations.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.max" title="apache_beam.dataframe.frames.DeferredDataFrame.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.max()</span></code></a></dt>
<dd>Maximum of the values in the object.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.min" title="apache_beam.dataframe.frames.DeferredDataFrame.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.min()</span></code></a></dt>
<dd>Minimum of the values in the object.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mean" title="apache_beam.dataframe.frames.DeferredDataFrame.mean"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mean()</span></code></a></dt>
<dd>Mean of the values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.std" title="apache_beam.dataframe.frames.DeferredDataFrame.std"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.std()</span></code></a></dt>
<dd>Standard deviation of the observations.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.select_dtypes" title="apache_beam.dataframe.frames.DeferredDataFrame.select_dtypes"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.select_dtypes()</span></code></a></dt>
<dd>Subset of a DeferredDataFrame including/excluding columns based on their dtype.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>For numeric data, the result’s index will include <code class="docutils literal notranslate"><span class="pre">count</span></code>,
<code class="docutils literal notranslate"><span class="pre">mean</span></code>, <code class="docutils literal notranslate"><span class="pre">std</span></code>, <code class="docutils literal notranslate"><span class="pre">min</span></code>, <code class="docutils literal notranslate"><span class="pre">max</span></code> as well as lower, <code class="docutils literal notranslate"><span class="pre">50</span></code> and
upper percentiles. By default the lower percentile is <code class="docutils literal notranslate"><span class="pre">25</span></code> and the
upper percentile is <code class="docutils literal notranslate"><span class="pre">75</span></code>. The <code class="docutils literal notranslate"><span class="pre">50</span></code> percentile is the
same as the median.</p>
<p>For object data (e.g. strings or timestamps), the result’s index
will include <code class="docutils literal notranslate"><span class="pre">count</span></code>, <code class="docutils literal notranslate"><span class="pre">unique</span></code>, <code class="docutils literal notranslate"><span class="pre">top</span></code>, and <code class="docutils literal notranslate"><span class="pre">freq</span></code>. The <code class="docutils literal notranslate"><span class="pre">top</span></code>
is the most common value. The <code class="docutils literal notranslate"><span class="pre">freq</span></code> is the most common value’s
frequency. Timestamps also include the <code class="docutils literal notranslate"><span class="pre">first</span></code> and <code class="docutils literal notranslate"><span class="pre">last</span></code> items.</p>
<p>If multiple object values have the highest count, then the
<code class="docutils literal notranslate"><span class="pre">count</span></code> and <code class="docutils literal notranslate"><span class="pre">top</span></code> results will be arbitrarily chosen from
among those with the highest count.</p>
<p>For mixed data types provided via a <code class="docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code>, the default is to
return only an analysis of numeric columns. If the dataframe consists
only of object and categorical data without any numeric columns, the
default is to return an analysis of both the object and categorical
columns. If <code class="docutils literal notranslate"><span class="pre">include='all'</span></code> is provided as an option, the result
will include a union of attributes of each type.</p>
<p>The <cite>include</cite> and <cite>exclude</cite> parameters can be used to limit
which columns in a <code class="docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code> are analyzed for the output.
The parameters are ignored when analyzing a <code class="docutils literal notranslate"><span class="pre">DeferredSeries</span></code>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Describing a numeric ``Series``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">describe</span><span class="p">()</span>
<span class="go">count 3.0</span>
<span class="go">mean 2.0</span>
<span class="go">std 1.0</span>
<span class="go">min 1.0</span>
<span class="go">25% 1.5</span>
<span class="go">50% 2.0</span>
<span class="go">75% 2.5</span>
<span class="go">max 3.0</span>
<span class="go">dtype: float64</span>
<span class="go">Describing a categorical ``Series``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">describe</span><span class="p">()</span>
<span class="go">count 4</span>
<span class="go">unique 3</span>
<span class="go">top a</span>
<span class="go">freq 2</span>
<span class="go">dtype: object</span>
<span class="go">Describing a timestamp ``Series``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span>
<span class="gp">... </span> <span class="n">np</span><span class="o">.</span><span class="n">datetime64</span><span class="p">(</span><span class="s2">&quot;2000-01-01&quot;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">np</span><span class="o">.</span><span class="n">datetime64</span><span class="p">(</span><span class="s2">&quot;2010-01-01&quot;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">np</span><span class="o">.</span><span class="n">datetime64</span><span class="p">(</span><span class="s2">&quot;2010-01-01&quot;</span><span class="p">)</span>
<span class="gp">... </span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">datetime_is_numeric</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go">count 3</span>
<span class="go">mean 2006-09-01 08:00:00</span>
<span class="go">min 2000-01-01 00:00:00</span>
<span class="go">25% 2004-12-31 12:00:00</span>
<span class="go">50% 2010-01-01 00:00:00</span>
<span class="go">75% 2010-01-01 00:00:00</span>
<span class="go">max 2010-01-01 00:00:00</span>
<span class="go">dtype: object</span>
<span class="go">Describing a ``DataFrame``. By default only numeric fields</span>
<span class="go">are returned.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;categorical&#39;</span><span class="p">:</span> <span class="n">pd</span><span class="o">.</span><span class="n">Categorical</span><span class="p">([</span><span class="s1">&#39;d&#39;</span><span class="p">,</span><span class="s1">&#39;e&#39;</span><span class="p">,</span><span class="s1">&#39;f&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="s1">&#39;numeric&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;object&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">]</span>
<span class="gp">... </span> <span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">describe</span><span class="p">()</span>
<span class="go"> numeric</span>
<span class="go">count 3.0</span>
<span class="go">mean 2.0</span>
<span class="go">std 1.0</span>
<span class="go">min 1.0</span>
<span class="go">25% 1.5</span>
<span class="go">50% 2.0</span>
<span class="go">75% 2.5</span>
<span class="go">max 3.0</span>
<span class="go">Describing all columns of a ``DataFrame`` regardless of data type.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="s1">&#39;all&#39;</span><span class="p">)</span>
<span class="go"> categorical numeric object</span>
<span class="go">count 3 3.0 3</span>
<span class="go">unique 3 NaN 3</span>
<span class="go">top f NaN a</span>
<span class="go">freq 1 NaN 1</span>
<span class="go">mean NaN 2.0 NaN</span>
<span class="go">std NaN 1.0 NaN</span>
<span class="go">min NaN 1.0 NaN</span>
<span class="go">25% NaN 1.5 NaN</span>
<span class="go">50% NaN 2.0 NaN</span>
<span class="go">75% NaN 2.5 NaN</span>
<span class="go">max NaN 3.0 NaN</span>
<span class="go">Describing a column from a ``DataFrame`` by accessing it as</span>
<span class="go">an attribute.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">numeric</span><span class="o">.</span><span class="n">describe</span><span class="p">()</span>
<span class="go">count 3.0</span>
<span class="go">mean 2.0</span>
<span class="go">std 1.0</span>
<span class="go">min 1.0</span>
<span class="go">25% 1.5</span>
<span class="go">50% 2.0</span>
<span class="go">75% 2.5</span>
<span class="go">max 3.0</span>
<span class="go">Name: numeric, dtype: float64</span>
<span class="go">Including only numeric columns in a ``DataFrame`` description.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">number</span><span class="p">])</span>
<span class="go"> numeric</span>
<span class="go">count 3.0</span>
<span class="go">mean 2.0</span>
<span class="go">std 1.0</span>
<span class="go">min 1.0</span>
<span class="go">25% 1.5</span>
<span class="go">50% 2.0</span>
<span class="go">75% 2.5</span>
<span class="go">max 3.0</span>
<span class="go">Including only string columns in a ``DataFrame`` description.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="p">[</span><span class="nb">object</span><span class="p">])</span>
<span class="go"> object</span>
<span class="go">count 3</span>
<span class="go">unique 3</span>
<span class="go">top a</span>
<span class="go">freq 1</span>
<span class="go">Including only categorical columns from a ``DataFrame`` description.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">include</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;category&#39;</span><span class="p">])</span>
<span class="go"> categorical</span>
<span class="go">count 3</span>
<span class="go">unique 3</span>
<span class="go">top d</span>
<span class="go">freq 1</span>
<span class="go">Excluding numeric columns from a ``DataFrame`` description.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">exclude</span><span class="o">=</span><span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">number</span><span class="p">])</span>
<span class="go"> categorical object</span>
<span class="go">count 3 3</span>
<span class="go">unique 3 3</span>
<span class="go">top f a</span>
<span class="go">freq 1 1</span>
<span class="go">Excluding object columns from a ``DataFrame`` description.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">describe</span><span class="p">(</span><span class="n">exclude</span><span class="o">=</span><span class="p">[</span><span class="nb">object</span><span class="p">])</span>
<span class="go"> categorical numeric</span>
<span class="go">count 3 3.0</span>
<span class="go">unique 3 NaN</span>
<span class="go">top f NaN</span>
<span class="go">freq 1 NaN</span>
<span class="go">mean NaN 2.0</span>
<span class="go">std NaN 1.0</span>
<span class="go">min NaN 1.0</span>
<span class="go">25% NaN 1.5</span>
<span class="go">50% NaN 2.0</span>
<span class="go">75% NaN 2.5</span>
<span class="go">max NaN 3.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.max">
<code class="descname">max</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.max" title="Permalink to this definition"></a></dt>
<dd><p>Return the maximum of the values over the requested axis.</p>
<p>If you want the <em>index</em> of the maximum, use <code class="docutils literal notranslate"><span class="pre">idxmax</span></code>. This is the equivalent of the <code class="docutils literal notranslate"><span class="pre">numpy.ndarray</span></code> method <code class="docutils literal notranslate"><span class="pre">argmax</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>, </em><em>columns</em><em> (</em><em>1</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sum" title="apache_beam.dataframe.frames.DeferredSeries.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sum()</span></code></a></dt>
<dd>Return the sum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.min" title="apache_beam.dataframe.frames.DeferredSeries.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.min()</span></code></a></dt>
<dd>Return the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.max" title="apache_beam.dataframe.frames.DeferredSeries.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.max()</span></code></a></dt>
<dd>Return the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmin" title="apache_beam.dataframe.frames.DeferredSeries.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmax" title="apache_beam.dataframe.frames.DeferredSeries.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sum" title="apache_beam.dataframe.frames.DeferredDataFrame.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sum()</span></code></a></dt>
<dd>Return the sum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.min" title="apache_beam.dataframe.frames.DeferredDataFrame.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.min()</span></code></a></dt>
<dd>Return the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.max" title="apache_beam.dataframe.frames.DeferredDataFrame.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.max()</span></code></a></dt>
<dd>Return the maximum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmin" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmax" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum over the requested axis.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">idx</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_arrays</span><span class="p">([</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;warm&#39;</span><span class="p">,</span> <span class="s1">&#39;warm&#39;</span><span class="p">,</span> <span class="s1">&#39;cold&#39;</span><span class="p">,</span> <span class="s1">&#39;cold&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;dog&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;fish&#39;</span><span class="p">,</span> <span class="s1">&#39;spider&#39;</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">names</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;blooded&#39;</span><span class="p">,</span> <span class="s1">&#39;animal&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">8</span><span class="p">],</span> <span class="n">name</span><span class="o">=</span><span class="s1">&#39;legs&#39;</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="n">idx</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">blooded animal</span>
<span class="go">warm dog 4</span>
<span class="go"> falcon 2</span>
<span class="go">cold fish 0</span>
<span class="go"> spider 8</span>
<span class="go">Name: legs, dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">max</span><span class="p">()</span>
<span class="go">8</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.min">
<code class="descname">min</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.min" title="Permalink to this definition"></a></dt>
<dd><p>Return the minimum of the values over the requested axis.</p>
<p>If you want the <em>index</em> of the minimum, use <code class="docutils literal notranslate"><span class="pre">idxmin</span></code>. This is the equivalent of the <code class="docutils literal notranslate"><span class="pre">numpy.ndarray</span></code> method <code class="docutils literal notranslate"><span class="pre">argmin</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>, </em><em>columns</em><em> (</em><em>1</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sum" title="apache_beam.dataframe.frames.DeferredSeries.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sum()</span></code></a></dt>
<dd>Return the sum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.min" title="apache_beam.dataframe.frames.DeferredSeries.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.min()</span></code></a></dt>
<dd>Return the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.max" title="apache_beam.dataframe.frames.DeferredSeries.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.max()</span></code></a></dt>
<dd>Return the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmin" title="apache_beam.dataframe.frames.DeferredSeries.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmax" title="apache_beam.dataframe.frames.DeferredSeries.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sum" title="apache_beam.dataframe.frames.DeferredDataFrame.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sum()</span></code></a></dt>
<dd>Return the sum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.min" title="apache_beam.dataframe.frames.DeferredDataFrame.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.min()</span></code></a></dt>
<dd>Return the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.max" title="apache_beam.dataframe.frames.DeferredDataFrame.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.max()</span></code></a></dt>
<dd>Return the maximum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmin" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmax" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum over the requested axis.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">idx</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_arrays</span><span class="p">([</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;warm&#39;</span><span class="p">,</span> <span class="s1">&#39;warm&#39;</span><span class="p">,</span> <span class="s1">&#39;cold&#39;</span><span class="p">,</span> <span class="s1">&#39;cold&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;dog&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;fish&#39;</span><span class="p">,</span> <span class="s1">&#39;spider&#39;</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">names</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;blooded&#39;</span><span class="p">,</span> <span class="s1">&#39;animal&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">8</span><span class="p">],</span> <span class="n">name</span><span class="o">=</span><span class="s1">&#39;legs&#39;</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="n">idx</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">blooded animal</span>
<span class="go">warm dog 4</span>
<span class="go"> falcon 2</span>
<span class="go">cold fish 0</span>
<span class="go"> spider 8</span>
<span class="go">Name: legs, dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">min</span><span class="p">()</span>
<span class="go">0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.prod">
<code class="descname">prod</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.prod" title="Permalink to this definition"></a></dt>
<dd><p>Return the product of the values over the requested axis.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>, </em><em>columns</em><em> (</em><em>1</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>min_count</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 0</em>) – The required number of valid values to perform the operation. If fewer than
<code class="docutils literal notranslate"><span class="pre">min_count</span></code> non-NA values are present the result will be NA.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sum" title="apache_beam.dataframe.frames.DeferredSeries.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sum()</span></code></a></dt>
<dd>Return the sum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.min" title="apache_beam.dataframe.frames.DeferredSeries.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.min()</span></code></a></dt>
<dd>Return the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.max" title="apache_beam.dataframe.frames.DeferredSeries.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.max()</span></code></a></dt>
<dd>Return the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmin" title="apache_beam.dataframe.frames.DeferredSeries.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmax" title="apache_beam.dataframe.frames.DeferredSeries.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sum" title="apache_beam.dataframe.frames.DeferredDataFrame.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sum()</span></code></a></dt>
<dd>Return the sum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.min" title="apache_beam.dataframe.frames.DeferredDataFrame.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.min()</span></code></a></dt>
<dd>Return the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.max" title="apache_beam.dataframe.frames.DeferredDataFrame.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.max()</span></code></a></dt>
<dd>Return the maximum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmin" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmax" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum over the requested axis.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">By default, the product of an empty or all-NA Series is ``1``</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">prod</span><span class="p">()</span>
<span class="go">1.0</span>
<span class="go">This can be controlled with the ``min_count`` parameter</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">prod</span><span class="p">(</span><span class="n">min_count</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">nan</span>
<span class="go">Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and</span>
<span class="go">empty series identically.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">prod</span><span class="p">()</span>
<span class="go">1.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">prod</span><span class="p">(</span><span class="n">min_count</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">nan</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.product">
<code class="descname">product</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.product" title="Permalink to this definition"></a></dt>
<dd><p>Return the product of the values over the requested axis.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>, </em><em>columns</em><em> (</em><em>1</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>min_count</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 0</em>) – The required number of valid values to perform the operation. If fewer than
<code class="docutils literal notranslate"><span class="pre">min_count</span></code> non-NA values are present the result will be NA.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sum" title="apache_beam.dataframe.frames.DeferredSeries.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sum()</span></code></a></dt>
<dd>Return the sum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.min" title="apache_beam.dataframe.frames.DeferredSeries.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.min()</span></code></a></dt>
<dd>Return the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.max" title="apache_beam.dataframe.frames.DeferredSeries.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.max()</span></code></a></dt>
<dd>Return the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmin" title="apache_beam.dataframe.frames.DeferredSeries.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmax" title="apache_beam.dataframe.frames.DeferredSeries.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sum" title="apache_beam.dataframe.frames.DeferredDataFrame.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sum()</span></code></a></dt>
<dd>Return the sum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.min" title="apache_beam.dataframe.frames.DeferredDataFrame.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.min()</span></code></a></dt>
<dd>Return the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.max" title="apache_beam.dataframe.frames.DeferredDataFrame.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.max()</span></code></a></dt>
<dd>Return the maximum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmin" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmax" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum over the requested axis.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">By default, the product of an empty or all-NA Series is ``1``</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">prod</span><span class="p">()</span>
<span class="go">1.0</span>
<span class="go">This can be controlled with the ``min_count`` parameter</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">prod</span><span class="p">(</span><span class="n">min_count</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">nan</span>
<span class="go">Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and</span>
<span class="go">empty series identically.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">prod</span><span class="p">()</span>
<span class="go">1.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">prod</span><span class="p">(</span><span class="n">min_count</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">nan</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.sum">
<code class="descname">sum</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.sum" title="Permalink to this definition"></a></dt>
<dd><p>Return the sum of the values over the requested axis.</p>
<p>This is equivalent to the method <code class="docutils literal notranslate"><span class="pre">numpy.sum</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>, </em><em>columns</em><em> (</em><em>1</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>min_count</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 0</em>) – The required number of valid values to perform the operation. If fewer than
<code class="docutils literal notranslate"><span class="pre">min_count</span></code> non-NA values are present the result will be NA.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sum" title="apache_beam.dataframe.frames.DeferredSeries.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sum()</span></code></a></dt>
<dd>Return the sum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.min" title="apache_beam.dataframe.frames.DeferredSeries.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.min()</span></code></a></dt>
<dd>Return the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.max" title="apache_beam.dataframe.frames.DeferredSeries.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.max()</span></code></a></dt>
<dd>Return the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmin" title="apache_beam.dataframe.frames.DeferredSeries.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmax" title="apache_beam.dataframe.frames.DeferredSeries.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sum" title="apache_beam.dataframe.frames.DeferredDataFrame.sum"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sum()</span></code></a></dt>
<dd>Return the sum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.min" title="apache_beam.dataframe.frames.DeferredDataFrame.min"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.min()</span></code></a></dt>
<dd>Return the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.max" title="apache_beam.dataframe.frames.DeferredDataFrame.max"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.max()</span></code></a></dt>
<dd>Return the maximum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmin" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmin()</span></code></a></dt>
<dd>Return the index of the minimum over the requested axis.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmax" title="apache_beam.dataframe.frames.DeferredDataFrame.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.idxmax()</span></code></a></dt>
<dd>Return the index of the maximum over the requested axis.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">idx</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_arrays</span><span class="p">([</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;warm&#39;</span><span class="p">,</span> <span class="s1">&#39;warm&#39;</span><span class="p">,</span> <span class="s1">&#39;cold&#39;</span><span class="p">,</span> <span class="s1">&#39;cold&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;dog&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;fish&#39;</span><span class="p">,</span> <span class="s1">&#39;spider&#39;</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">names</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;blooded&#39;</span><span class="p">,</span> <span class="s1">&#39;animal&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">4</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">8</span><span class="p">],</span> <span class="n">name</span><span class="o">=</span><span class="s1">&#39;legs&#39;</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="n">idx</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">blooded animal</span>
<span class="go">warm dog 4</span>
<span class="go"> falcon 2</span>
<span class="go">cold fish 0</span>
<span class="go"> spider 8</span>
<span class="go">Name: legs, dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="go">14</span>
<span class="go">By default, the sum of an empty or all-NA Series is ``0``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="c1"># min_count=0 is the default</span>
<span class="go">0.0</span>
<span class="go">This can be controlled with the ``min_count`` parameter. For example, if</span>
<span class="go">you&#39;d like the sum of an empty series to be NaN, pass ``min_count=1``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([],</span> <span class="n">dtype</span><span class="o">=</span><span class="s2">&quot;float64&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">min_count</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">nan</span>
<span class="go">Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and</span>
<span class="go">empty series identically.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="go">0.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">])</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">min_count</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">nan</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.mean">
<code class="descname">mean</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.mean" title="Permalink to this definition"></a></dt>
<dd><p>Return the mean of the values over the requested axis.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>, </em><em>columns</em><em> (</em><em>1</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">mean</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.median">
<code class="descname">median</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.median" title="Permalink to this definition"></a></dt>
<dd><p>Return the median of the values over the requested axis.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>, </em><em>columns</em><em> (</em><em>1</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">median</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.nunique">
<code class="descname">nunique</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.nunique" title="Permalink to this definition"></a></dt>
<dd><p>Count number of distinct elements in specified axis.</p>
<p>Return Series with number of distinct elements. Can ignore NaN
values.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – The axis to use. 0 or ‘index’ for row-wise, 1 or ‘columns’ for
column-wise.</li>
<li><strong>dropna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Don’t include NaN in the counts.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">nunique</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.nunique" title="apache_beam.dataframe.frames.DeferredSeries.nunique"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.nunique()</span></code></a></dt>
<dd>Method nunique for DeferredSeries.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.count" title="apache_beam.dataframe.frames.DeferredDataFrame.count"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.count()</span></code></a></dt>
<dd>Count non-NA cells for each column or row.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">nunique</span><span class="p">()</span>
<span class="go">A 3</span>
<span class="go">B 2</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">nunique</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">2 2</span>
<span class="go">dtype: int64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.std">
<code class="descname">std</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.std" title="Permalink to this definition"></a></dt>
<dd><p>Return sample standard deviation over requested axis.</p>
<p>Normalized by N-1 by default. This can be changed using the ddof argument</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>, </em><em>columns</em><em> (</em><em>1</em><em>)</em><em>}</em>) – </li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values. If an entire row/column is NA, the result
will be NA.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
<li><strong>ddof</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 1</em>) – Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
where N represents the number of elements.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">std</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
<p class="rubric">Notes</p>
<p>To have the same behaviour as <cite>numpy.std</cite>, use <cite>ddof=0</cite> (instead of the
default <cite>ddof=1</cite>)</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.var">
<code class="descname">var</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.var" title="Permalink to this definition"></a></dt>
<dd><p>Return unbiased variance over requested axis.</p>
<p>Normalized by N-1 by default. This can be changed using the ddof argument</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>, </em><em>columns</em><em> (</em><em>1</em><em>)</em><em>}</em>) – </li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values. If an entire row/column is NA, the result
will be NA.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
<li><strong>ddof</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 1</em>) – Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
where N represents the number of elements.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">var</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
<p class="rubric">Notes</p>
<p>To have the same behaviour as <cite>numpy.std</cite>, use <cite>ddof=0</cite> (instead of the
default <cite>ddof=1</cite>)</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.sem">
<code class="descname">sem</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.sem" title="Permalink to this definition"></a></dt>
<dd><p>Return unbiased standard error of the mean over requested axis.</p>
<p>Normalized by N-1 by default. This can be changed using the ddof argument</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>, </em><em>columns</em><em> (</em><em>1</em><em>)</em><em>}</em>) – </li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values. If an entire row/column is NA, the result
will be NA.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
<li><strong>ddof</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 1</em>) – Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
where N represents the number of elements.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">sem</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
<p class="rubric">Notes</p>
<p>To have the same behaviour as <cite>numpy.std</cite>, use <cite>ddof=0</cite> (instead of the
default <cite>ddof=1</cite>)</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.mad">
<code class="descname">mad</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.mad" title="Permalink to this definition"></a></dt>
<dd><p>Return the mean absolute deviation of the values over the requested axis.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>, </em><em>columns</em><em> (</em><em>1</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">mad</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.skew">
<code class="descname">skew</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.skew" title="Permalink to this definition"></a></dt>
<dd><p>Return unbiased skew over requested axis.</p>
<p>Normalized by N-1.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>, </em><em>columns</em><em> (</em><em>1</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">skew</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.kurt">
<code class="descname">kurt</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.kurt" title="Permalink to this definition"></a></dt>
<dd><p>Return unbiased kurtosis over requested axis.</p>
<p>Kurtosis obtained using Fisher’s definition of
kurtosis (kurtosis of normal == 0.0). Normalized by N-1.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>, </em><em>columns</em><em> (</em><em>1</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">kurt</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.kurtosis">
<code class="descname">kurtosis</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.kurtosis" title="Permalink to this definition"></a></dt>
<dd><p>Return unbiased kurtosis over requested axis.</p>
<p>Kurtosis obtained using Fisher’s definition of
kurtosis (kurtosis of normal == 0.0). Normalized by N-1.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{index</em><em> (</em><em>0</em><em>)</em><em>, </em><em>columns</em><em> (</em><em>1</em><em>)</em><em>}</em>) – Axis for the function to be applied on.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values when computing the result.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), count along a
particular level, collapsing into a DeferredSeries.</li>
<li><strong>numeric_only</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – Include only float, int, boolean columns. If None, will attempt to use
everything, then use only numeric data. Not implemented for DeferredSeries.</li>
<li><strong>**kwargs</strong> – Additional keyword arguments to be passed to the function.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> (if level specified)</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">kurtosis</span></code> cannot currently be parallelized. It will require collecting all data on a single node.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.take">
<code class="descname">take</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.take" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.take.html#pandas.DataFrame.take" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.take()</span></code></a> is not yet supported in the Beam DataFrame API because it is deprecated in pandas.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_records">
<code class="descname">to_records</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_records" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_records.html#pandas.DataFrame.to_records" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_records()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_dict">
<code class="descname">to_dict</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_dict" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_dict.html#pandas.DataFrame.to_dict" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_dict()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_numpy">
<code class="descname">to_numpy</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_numpy" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_numpy.html#pandas.DataFrame.to_numpy" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_numpy()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_string">
<code class="descname">to_string</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_string" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_string.html#pandas.DataFrame.to_string" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_string()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_sparse">
<code class="descname">to_sparse</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_sparse" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_sparse()</span></code> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.transpose">
<code class="descname">transpose</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.transpose" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.transpose.html#pandas.DataFrame.transpose" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.transpose()</span></code></a> is not yet supported in the Beam DataFrame API because the columns in the output DataFrame depend on the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-columns">https://s.apache.org/dataframe-non-deferred-columns</a>.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.T">
<code class="descname">T</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.T" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.T()</span></code> is not yet supported in the Beam DataFrame API because the columns in the output DataFrame depend on the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-columns">https://s.apache.org/dataframe-non-deferred-columns</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.unstack">
<code class="descname">unstack</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.unstack"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.unstack" title="Permalink to this definition"></a></dt>
<dd><p>Pivot a level of the (necessarily hierarchical) index labels.</p>
<p>Returns a DataFrame having a new level of column labels whose inner-most level
consists of the pivoted index labels.</p>
<p>If the index is not a MultiIndex, the output will be a Series
(the analogue of stack when the columns are not a MultiIndex).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, or </em><em>list of these</em><em>, </em><em>default -1</em><em> (</em><em>last level</em><em>)</em>) – Level(s) of index to unstack, can pass level name.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a>) – Replace NaN with this value if the unstack produces missing values.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>unstack cannot be used on <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code></a> instances with
multiple index levels, because the columns in the output depend on the
data.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pivot" title="apache_beam.dataframe.frames.DeferredDataFrame.pivot"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pivot()</span></code></a></dt>
<dd>Pivot a table based on column values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.stack" title="apache_beam.dataframe.frames.DeferredDataFrame.stack"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.stack()</span></code></a></dt>
<dd>Pivot a level of the column labels (inverse operation from <cite>unstack</cite>).</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">index</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_tuples</span><span class="p">([(</span><span class="s1">&#39;one&#39;</span><span class="p">,</span> <span class="s1">&#39;a&#39;</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;one&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;two&#39;</span><span class="p">,</span> <span class="s1">&#39;a&#39;</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;two&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">)])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">5.0</span><span class="p">),</span> <span class="n">index</span><span class="o">=</span><span class="n">index</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">one a 1.0</span>
<span class="go"> b 2.0</span>
<span class="go">two a 3.0</span>
<span class="go"> b 4.0</span>
<span class="go">dtype: float64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">unstack</span><span class="p">(</span><span class="n">level</span><span class="o">=-</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> a b</span>
<span class="go">one 1.0 2.0</span>
<span class="go">two 3.0 4.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">unstack</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> one two</span>
<span class="go">a 1.0 3.0</span>
<span class="go">b 2.0 4.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">unstack</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">unstack</span><span class="p">()</span>
<span class="go">one a 1.0</span>
<span class="go"> b 2.0</span>
<span class="go">two a 3.0</span>
<span class="go"> b 4.0</span>
<span class="go">dtype: float64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.update">
<code class="descname">update</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.update" title="Permalink to this definition"></a></dt>
<dd><p>Modify in place using non-NA values from another DataFrame.</p>
<p>Aligns on indices. There is no return value.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a><em>, or </em><em>object coercible into a DeferredDataFrame</em>) – Should have at least one matching index/column label
with the original DeferredDataFrame. If a DeferredSeries is passed,
its name attribute must be set, and that will be
used as the column name to align with the original DeferredDataFrame.</li>
<li><strong>join</strong> (<em>{'left'}</em><em>, </em><em>default 'left'</em>) – Only left join is implemented, keeping the index and columns of the
original object.</li>
<li><strong>overwrite</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – <p>How to handle non-NA values for overlapping keys:</p>
<ul>
<li>True: overwrite original DeferredDataFrame’s values
with values from <cite>other</cite>.</li>
<li>False: only update values that are NA in
the original DeferredDataFrame.</li>
</ul>
</li>
<li><strong>filter_func</strong> (<em>callable</em><em>(</em><em>1d-array</em><em>) </em><em>-&gt; bool 1d-array</em><em>, </em><em>optional</em>) – Can choose to replace values other than NA. Return True for values
that should be updated.</li>
<li><strong>errors</strong> (<em>{'raise'</em><em>, </em><em>'ignore'}</em><em>, </em><em>default 'ignore'</em>) – If ‘raise’, will raise a ValueError if the DeferredDataFrame and <cite>other</cite>
both contain non-NA data in the same place.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><strong>None</strong></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first">method directly changes calling object</p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><ul class="first last simple">
<li><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#ValueError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> – * When <cite>errors=’raise’</cite> and there’s overlapping non-NA data.
* When <cite>errors</cite> is not either <cite>‘ignore’</cite> or <cite>‘raise’</cite></li>
<li><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#NotImplementedError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">NotImplementedError</span></code></a> – * If <cite>join != ‘left’</cite></li>
</ul>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict.update" title="(in Python v3.10)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">dict.update()</span></code></a></dt>
<dd>Similar method for dictionaries.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.merge" title="apache_beam.dataframe.frames.DeferredDataFrame.merge"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.merge()</span></code></a></dt>
<dd>For column(s)-on-column(s) operations.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">400</span><span class="p">,</span> <span class="mi">500</span><span class="p">,</span> <span class="mi">600</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">new_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;C&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">new_df</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 1 4</span>
<span class="go">1 2 5</span>
<span class="go">2 3 6</span>
<span class="go">The DataFrame&#39;s length does not increase as a result of the update,</span>
<span class="go">only values at matching index/column labels are updated.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;x&#39;</span><span class="p">,</span> <span class="s1">&#39;y&#39;</span><span class="p">,</span> <span class="s1">&#39;z&#39;</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">new_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">,</span> <span class="s1">&#39;f&#39;</span><span class="p">,</span> <span class="s1">&#39;g&#39;</span><span class="p">,</span> <span class="s1">&#39;h&#39;</span><span class="p">,</span> <span class="s1">&#39;i&#39;</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">new_df</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 a d</span>
<span class="go">1 b e</span>
<span class="go">2 c f</span>
<span class="go">For Series, its name attribute must be set.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;x&#39;</span><span class="p">,</span> <span class="s1">&#39;y&#39;</span><span class="p">,</span> <span class="s1">&#39;z&#39;</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">new_column</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">],</span> <span class="n">name</span><span class="o">=</span><span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">new_column</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 a d</span>
<span class="go">1 b y</span>
<span class="go">2 c e</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;x&#39;</span><span class="p">,</span> <span class="s1">&#39;y&#39;</span><span class="p">,</span> <span class="s1">&#39;z&#39;</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">new_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">]},</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">new_df</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 a x</span>
<span class="go">1 b d</span>
<span class="go">2 c e</span>
<span class="go">If `other` contains NaNs the corresponding values are not updated</span>
<span class="go">in the original dataframe.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">400</span><span class="p">,</span> <span class="mi">500</span><span class="p">,</span> <span class="mi">600</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">new_df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">6</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">new_df</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 1 4.0</span>
<span class="go">1 2 500.0</span>
<span class="go">2 3 6.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.values">
<code class="descname">values</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.values" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.values()</span></code> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.style">
<code class="descname">style</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.style" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.style()</span></code> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.melt">
<code class="descname">melt</code><span class="sig-paren">(</span><em>ignore_index</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.melt"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.melt" title="Permalink to this definition"></a></dt>
<dd><p>Unpivot a DataFrame from wide to long format, optionally leaving identifiers set.</p>
<p>This function is useful to massage a DataFrame into a format where one
or more columns are identifier variables (<cite>id_vars</cite>), while all other
columns, considered measured variables (<cite>value_vars</cite>), are “unpivoted” to
the row axis, leaving just two non-identifier columns, ‘variable’ and
‘value’.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>id_vars</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.10)"><em>tuple</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, or </em><em>ndarray</em><em>, </em><em>optional</em>) – Column(s) to use as identifier variables.</li>
<li><strong>value_vars</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.10)"><em>tuple</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, or </em><em>ndarray</em><em>, </em><em>optional</em>) – Column(s) to unpivot. If not specified, uses all columns that
are not set as <cite>id_vars</cite>.</li>
<li><strong>var_name</strong> (<em>scalar</em>) – Name to use for the ‘variable’ column. If None it uses
<code class="docutils literal notranslate"><span class="pre">frame.columns.name</span></code> or ‘variable’.</li>
<li><strong>value_name</strong> (<em>scalar</em><em>, </em><em>default 'value'</em>) – Name to use for the ‘value’ column.</li>
<li><strong>col_level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – If columns are a MultiIndex then use this level to melt.</li>
<li><strong>ignore_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – <p>If True, original index is ignored. If False, the original index is retained.
Index labels will be repeated as necessary.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Unpivoted DeferredDataFrame.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">ignore_index=True</span></code> is not supported, because it requires generating an
order-sensitive index.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.melt" title="apache_beam.dataframe.frames.DeferredDataFrame.melt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">melt()</span></code></a></dt>
<dd>Identical method.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pivot_table" title="apache_beam.dataframe.frames.DeferredDataFrame.pivot_table"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pivot_table()</span></code></a></dt>
<dd>Create a spreadsheet-style pivot table as a DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pivot" title="apache_beam.dataframe.frames.DeferredDataFrame.pivot"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pivot()</span></code></a></dt>
<dd>Return reshaped DeferredDataFrame organized by given index / column values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.explode" title="apache_beam.dataframe.frames.DeferredDataFrame.explode"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.explode()</span></code></a></dt>
<dd>Explode a DeferredDataFrame from list-like columns to long format.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="s1">&#39;c&#39;</span><span class="p">},</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="mi">5</span><span class="p">},</span>
<span class="gp">... </span> <span class="s1">&#39;C&#39;</span><span class="p">:</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">2</span><span class="p">:</span> <span class="mi">6</span><span class="p">}})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B C</span>
<span class="go">0 a 1 2</span>
<span class="go">1 b 3 4</span>
<span class="go">2 c 5 6</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">melt</span><span class="p">(</span><span class="n">id_vars</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">],</span> <span class="n">value_vars</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;B&#39;</span><span class="p">])</span>
<span class="go"> A variable value</span>
<span class="go">0 a B 1</span>
<span class="go">1 b B 3</span>
<span class="go">2 c B 5</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">melt</span><span class="p">(</span><span class="n">id_vars</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">],</span> <span class="n">value_vars</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">])</span>
<span class="go"> A variable value</span>
<span class="go">0 a B 1</span>
<span class="go">1 b B 3</span>
<span class="go">2 c B 5</span>
<span class="go">3 a C 2</span>
<span class="go">4 b C 4</span>
<span class="go">5 c C 6</span>
<span class="go">The names of &#39;variable&#39; and &#39;value&#39; columns can be customized:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">melt</span><span class="p">(</span><span class="n">id_vars</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">],</span> <span class="n">value_vars</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">var_name</span><span class="o">=</span><span class="s1">&#39;myVarname&#39;</span><span class="p">,</span> <span class="n">value_name</span><span class="o">=</span><span class="s1">&#39;myValname&#39;</span><span class="p">)</span>
<span class="go"> A myVarname myValname</span>
<span class="go">0 a B 1</span>
<span class="go">1 b B 3</span>
<span class="go">2 c B 5</span>
<span class="go">Original index values can be kept around:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">melt</span><span class="p">(</span><span class="n">id_vars</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">],</span> <span class="n">value_vars</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">],</span> <span class="n">ignore_index</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go"> A variable value</span>
<span class="go">0 a B 1</span>
<span class="go">1 b B 3</span>
<span class="go">2 c B 5</span>
<span class="go">0 a C 2</span>
<span class="go">1 b C 4</span>
<span class="go">2 c C 6</span>
<span class="go">If you have multi-index columns:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="p">[</span><span class="nb">list</span><span class="p">(</span><span class="s1">&#39;ABC&#39;</span><span class="p">),</span> <span class="nb">list</span><span class="p">(</span><span class="s1">&#39;DEF&#39;</span><span class="p">)]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B C</span>
<span class="go"> D E F</span>
<span class="go">0 a 1 2</span>
<span class="go">1 b 3 4</span>
<span class="go">2 c 5 6</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">melt</span><span class="p">(</span><span class="n">col_level</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">id_vars</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">],</span> <span class="n">value_vars</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;B&#39;</span><span class="p">])</span>
<span class="go"> A variable value</span>
<span class="go">0 a B 1</span>
<span class="go">1 b B 3</span>
<span class="go">2 c B 5</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">melt</span><span class="p">(</span><span class="n">id_vars</span><span class="o">=</span><span class="p">[(</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;D&#39;</span><span class="p">)],</span> <span class="n">value_vars</span><span class="o">=</span><span class="p">[(</span><span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;E&#39;</span><span class="p">)])</span>
<span class="go"> (A, D) variable_0 variable_1 value</span>
<span class="go">0 a B E 1</span>
<span class="go">1 b B E 3</span>
<span class="go">2 c B E 5</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.value_counts">
<code class="descname">value_counts</code><span class="sig-paren">(</span><em>subset=None</em>, <em>sort=False</em>, <em>normalize=False</em>, <em>ascending=False</em>, <em>dropna=True</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.value_counts"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.value_counts" title="Permalink to this definition"></a></dt>
<dd><p>Return a Series containing counts of unique rows in the DataFrame.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>subset</strong> (<em>list-like</em><em>, </em><em>optional</em>) – Columns to use when counting unique combinations.</li>
<li><strong>normalize</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Return proportions rather than frequencies.</li>
<li><strong>sort</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Sort by frequencies.</li>
<li><strong>ascending</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Sort in ascending order.</li>
<li><strong>dropna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – <p>Don’t include counts of rows that contain NA values.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.3.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">sort</span></code> is <code class="docutils literal notranslate"><span class="pre">False</span></code> by default, and <code class="docutils literal notranslate"><span class="pre">sort=True</span></code> is not supported
because it imposes an ordering on the dataset which likely will not be
preserved.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.value_counts" title="apache_beam.dataframe.frames.DeferredSeries.value_counts"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.value_counts()</span></code></a></dt>
<dd>Equivalent method on DeferredSeries.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>The returned DeferredSeries will have a MultiIndex with one level per input
column. By default, rows that contain any NA values are omitted from
the result. By default, the resulting DeferredSeries will be in descending
order so that the first element is the most frequently-occurring row.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;num_legs&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;num_wings&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;dog&#39;</span><span class="p">,</span> <span class="s1">&#39;cat&#39;</span><span class="p">,</span> <span class="s1">&#39;ant&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> num_legs num_wings</span>
<span class="go">falcon 2 2</span>
<span class="go">dog 4 0</span>
<span class="go">cat 4 0</span>
<span class="go">ant 6 0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">value_counts</span><span class="p">()</span>
<span class="go">num_legs num_wings</span>
<span class="go">4 0 2</span>
<span class="go">2 2 1</span>
<span class="go">6 0 1</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">value_counts</span><span class="p">(</span><span class="n">sort</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">num_legs num_wings</span>
<span class="go">2 2 1</span>
<span class="go">4 0 2</span>
<span class="go">6 0 1</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">value_counts</span><span class="p">(</span><span class="n">ascending</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go">num_legs num_wings</span>
<span class="go">2 2 1</span>
<span class="go">6 0 1</span>
<span class="go">4 0 2</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">value_counts</span><span class="p">(</span><span class="n">normalize</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go">num_legs num_wings</span>
<span class="go">4 0 0.50</span>
<span class="go">2 2 0.25</span>
<span class="go">6 0 0.25</span>
<span class="go">dtype: float64</span>
<span class="go">With `dropna` set to `False` we can also count rows with NA values.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;first_name&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;John&#39;</span><span class="p">,</span> <span class="s1">&#39;Anne&#39;</span><span class="p">,</span> <span class="s1">&#39;John&#39;</span><span class="p">,</span> <span class="s1">&#39;Beth&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;middle_name&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;Smith&#39;</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">NA</span><span class="p">,</span> <span class="n">pd</span><span class="o">.</span><span class="n">NA</span><span class="p">,</span> <span class="s1">&#39;Louise&#39;</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> first_name middle_name</span>
<span class="go">0 John Smith</span>
<span class="go">1 Anne &lt;NA&gt;</span>
<span class="go">2 John &lt;NA&gt;</span>
<span class="go">3 Beth Louise</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">value_counts</span><span class="p">()</span>
<span class="go">first_name middle_name</span>
<span class="go">Beth Louise 1</span>
<span class="go">John Smith 1</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">value_counts</span><span class="p">(</span><span class="n">dropna</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">first_name middle_name</span>
<span class="go">Anne NaN 1</span>
<span class="go">Beth Louise 1</span>
<span class="go">John Smith 1</span>
<span class="go"> NaN 1</span>
<span class="go">dtype: int64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.compare">
<code class="descname">compare</code><span class="sig-paren">(</span><em>other</em>, <em>align_axis</em>, <em>keep_shape</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.compare"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.compare" title="Permalink to this definition"></a></dt>
<dd><p>Compare to another DataFrame and show the differences.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Object to compare with.</li>
<li><strong>align_axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 1</em>) – <p>Determine which axis to align the comparison on.</p>
<ul>
<li><dl class="first docutils">
<dt>0, or ‘index’ <span class="classifier-delimiter">:</span> <span class="classifier">Resulting differences are stacked vertically</span></dt>
<dd>with rows drawn alternately from self and other.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>1, or ‘columns’ <span class="classifier-delimiter">:</span> <span class="classifier">Resulting differences are aligned horizontally</span></dt>
<dd>with columns drawn alternately from self and other.</dd>
</dl>
</li>
</ul>
</li>
<li><strong>keep_shape</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If true, all rows and columns are kept.
Otherwise, only the ones with different values are kept.</li>
<li><strong>keep_equal</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If true, the result keeps values that are equal.
Otherwise, equal values are shown as NaNs.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><p>DeferredDataFrame that shows the differences stacked side by side.</p>
<p>The resulting index will be a MultiIndex with ‘self’ and ‘other’
stacked alternately at the inner level.</p>
</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#ValueError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> – When the two DeferredDataFrames don’t have identical labels or shape.</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>The default values <code class="docutils literal notranslate"><span class="pre">align_axis=1</span> <span class="pre">and</span> <span class="pre">``keep_shape=False</span></code>
are not supported, because the output columns depend on the data.
To use <code class="docutils literal notranslate"><span class="pre">align_axis=1</span></code>, please specify <code class="docutils literal notranslate"><span class="pre">keep_shape=True</span></code>.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.compare" title="apache_beam.dataframe.frames.DeferredSeries.compare"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.compare()</span></code></a></dt>
<dd>Compare with another DeferredSeries and show differences.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.equals" title="apache_beam.dataframe.frames.DeferredDataFrame.equals"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.equals()</span></code></a></dt>
<dd>Test whether two objects contain the same elements.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Matching NaNs will not appear as a difference.</p>
<p>Can only compare identically-labeled
(i.e. same shape, identical row and column labels) DeferredDataFrames</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span>
<span class="gp">... </span> <span class="p">{</span>
<span class="gp">... </span> <span class="s2">&quot;col1&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">,</span> <span class="s2">&quot;a&quot;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s2">&quot;col2&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">2.0</span><span class="p">,</span> <span class="mf">3.0</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mf">5.0</span><span class="p">],</span>
<span class="gp">... </span> <span class="s2">&quot;col3&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mf">1.0</span><span class="p">,</span> <span class="mf">2.0</span><span class="p">,</span> <span class="mf">3.0</span><span class="p">,</span> <span class="mf">4.0</span><span class="p">,</span> <span class="mf">5.0</span><span class="p">]</span>
<span class="gp">... </span> <span class="p">},</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;col1&quot;</span><span class="p">,</span> <span class="s2">&quot;col2&quot;</span><span class="p">,</span> <span class="s2">&quot;col3&quot;</span><span class="p">],</span>
<span class="gp">... </span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> col1 col2 col3</span>
<span class="go">0 a 1.0 1.0</span>
<span class="go">1 a 2.0 2.0</span>
<span class="go">2 b 3.0 3.0</span>
<span class="go">3 b NaN 4.0</span>
<span class="go">4 a 5.0 5.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="s1">&#39;col1&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;c&#39;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;col3&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="mf">4.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span>
<span class="go"> col1 col2 col3</span>
<span class="go">0 c 1.0 1.0</span>
<span class="go">1 a 2.0 2.0</span>
<span class="go">2 b 3.0 4.0</span>
<span class="go">3 b NaN 4.0</span>
<span class="go">4 a 5.0 5.0</span>
<span class="go">Align the differences on columns</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">compare</span><span class="p">(</span><span class="n">df2</span><span class="p">)</span>
<span class="go"> col1 col3</span>
<span class="go"> self other self other</span>
<span class="go">0 a c NaN NaN</span>
<span class="go">2 NaN NaN 3.0 4.0</span>
<span class="go">Stack the differences on rows</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">compare</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">align_axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> col1 col3</span>
<span class="go">0 self a NaN</span>
<span class="go"> other c NaN</span>
<span class="go">2 self NaN 3.0</span>
<span class="go"> other NaN 4.0</span>
<span class="go">Keep the equal values</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">compare</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">keep_equal</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> col1 col3</span>
<span class="go"> self other self other</span>
<span class="go">0 a c 1.0 1.0</span>
<span class="go">2 b b 3.0 4.0</span>
<span class="go">Keep all original rows and columns</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">compare</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">keep_shape</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> col1 col2 col3</span>
<span class="go"> self other self other self other</span>
<span class="go">0 a c NaN NaN NaN NaN</span>
<span class="go">1 NaN NaN NaN NaN NaN NaN</span>
<span class="go">2 NaN NaN NaN NaN 3.0 4.0</span>
<span class="go">3 NaN NaN NaN NaN NaN NaN</span>
<span class="go">4 NaN NaN NaN NaN NaN NaN</span>
<span class="go">Keep all original rows and columns and also all original values</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">compare</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">keep_shape</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">keep_equal</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> col1 col2 col3</span>
<span class="go"> self other self other self other</span>
<span class="go">0 a c 1.0 1.0 1.0 1.0</span>
<span class="go">1 a a 2.0 2.0 2.0 2.0</span>
<span class="go">2 b b 3.0 3.0 3.0 4.0</span>
<span class="go">3 b b NaN NaN 4.0 4.0</span>
<span class="go">4 a a 5.0 5.0 5.0 5.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.idxmin">
<code class="descname">idxmin</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.idxmin"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmin" title="Permalink to this definition"></a></dt>
<dd><p>Return index of first occurrence of minimum over requested axis.</p>
<p>NA/null values are excluded.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – The axis to use. 0 or ‘index’ for row-wise, 1 or ‘columns’ for column-wise.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values. If an entire row/column is NA, the result
will be NA.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Indexes of minima along the specified axis.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#ValueError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> – * If the row/column is empty</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmin" title="apache_beam.dataframe.frames.DeferredSeries.idxmin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmin()</span></code></a></dt>
<dd>Return index of the minimum element.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>This method is the DeferredDataFrame version of <code class="docutils literal notranslate"><span class="pre">ndarray.argmin</span></code>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Consider a dataset containing food consumption in Argentina.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;consumption&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mf">10.51</span><span class="p">,</span> <span class="mf">103.11</span><span class="p">,</span> <span class="mf">55.48</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;co2_emissions&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mf">37.2</span><span class="p">,</span> <span class="mf">19.66</span><span class="p">,</span> <span class="mi">1712</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;Pork&#39;</span><span class="p">,</span> <span class="s1">&#39;Wheat Products&#39;</span><span class="p">,</span> <span class="s1">&#39;Beef&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> consumption co2_emissions</span>
<span class="go">Pork 10.51 37.20</span>
<span class="go">Wheat Products 103.11 19.66</span>
<span class="go">Beef 55.48 1712.00</span>
<span class="go">By default, it returns the index for the minimum value in each column.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">idxmin</span><span class="p">()</span>
<span class="go">consumption Pork</span>
<span class="go">co2_emissions Wheat Products</span>
<span class="go">dtype: object</span>
<span class="go">To return the index for the minimum value in each row, use ``axis=&quot;columns&quot;``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">idxmin</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s2">&quot;columns&quot;</span><span class="p">)</span>
<span class="go">Pork consumption</span>
<span class="go">Wheat Products co2_emissions</span>
<span class="go">Beef consumption</span>
<span class="go">dtype: object</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.idxmax">
<code class="descname">idxmax</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="reference internal" href="_modules/apache_beam/dataframe/frames.html#DeferredDataFrame.idxmax"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.idxmax" title="Permalink to this definition"></a></dt>
<dd><p>Return index of first occurrence of maximum over requested axis.</p>
<p>NA/null values are excluded.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – The axis to use. 0 or ‘index’ for row-wise, 1 or ‘columns’ for column-wise.</li>
<li><strong>skipna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Exclude NA/null values. If an entire row/column is NA, the result
will be NA.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Indexes of maxima along the specified axis.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#ValueError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> – * If the row/column is empty</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.idxmax" title="apache_beam.dataframe.frames.DeferredSeries.idxmax"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.idxmax()</span></code></a></dt>
<dd>Return index of the maximum element.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>This method is the DeferredDataFrame version of <code class="docutils literal notranslate"><span class="pre">ndarray.argmax</span></code>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Consider a dataset containing food consumption in Argentina.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;consumption&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mf">10.51</span><span class="p">,</span> <span class="mf">103.11</span><span class="p">,</span> <span class="mf">55.48</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;co2_emissions&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mf">37.2</span><span class="p">,</span> <span class="mf">19.66</span><span class="p">,</span> <span class="mi">1712</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;Pork&#39;</span><span class="p">,</span> <span class="s1">&#39;Wheat Products&#39;</span><span class="p">,</span> <span class="s1">&#39;Beef&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> consumption co2_emissions</span>
<span class="go">Pork 10.51 37.20</span>
<span class="go">Wheat Products 103.11 19.66</span>
<span class="go">Beef 55.48 1712.00</span>
<span class="go">By default, it returns the index for the maximum value in each column.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">idxmax</span><span class="p">()</span>
<span class="go">consumption Wheat Products</span>
<span class="go">co2_emissions Beef</span>
<span class="go">dtype: object</span>
<span class="go">To return the index for the maximum value in each row, use ``axis=&quot;columns&quot;``.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">idxmax</span><span class="p">(</span><span class="n">axis</span><span class="o">=</span><span class="s2">&quot;columns&quot;</span><span class="p">)</span>
<span class="go">Pork co2_emissions</span>
<span class="go">Wheat Products consumption</span>
<span class="go">Beef co2_emissions</span>
<span class="go">dtype: object</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.abs">
<code class="descname">abs</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.abs" title="Permalink to this definition"></a></dt>
<dd><p>Return a Series/DataFrame with absolute numeric value of each element.</p>
<p>This function only applies to elements that are all numeric.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">DeferredSeries/DeferredDataFrame containing the absolute value of each element.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">abs</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.absolute()</span></code></dt>
<dd>Calculate the absolute value element-wise.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>For <code class="docutils literal notranslate"><span class="pre">complex</span></code> inputs, <code class="docutils literal notranslate"><span class="pre">1.2</span> <span class="pre">+</span> <span class="pre">1j</span></code>, the absolute value is
<span class="math notranslate nohighlight">\(\sqrt{ a^2 + b^2 }\)</span>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Absolute numeric values in a Series.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="o">-</span><span class="mf">1.10</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="o">-</span><span class="mf">3.33</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span>
<span class="go">0 1.10</span>
<span class="go">1 2.00</span>
<span class="go">2 3.33</span>
<span class="go">3 4.00</span>
<span class="go">dtype: float64</span>
<span class="go">Absolute numeric values in a Series with complex numbers.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mf">1.2</span> <span class="o">+</span> <span class="mi">1</span><span class="n">j</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span>
<span class="go">0 1.56205</span>
<span class="go">dtype: float64</span>
<span class="go">Absolute numeric values in a Series with a Timedelta element.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="n">pd</span><span class="o">.</span><span class="n">Timedelta</span><span class="p">(</span><span class="s1">&#39;1 days&#39;</span><span class="p">)])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span>
<span class="go">0 1 days</span>
<span class="go">dtype: timedelta64[ns]</span>
<span class="go">Select rows with data closest to certain value using argsort (from</span>
<span class="go">`StackOverflow &lt;https://stackoverflow.com/a/17758115&gt;`__).</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span>
<span class="gp">... </span> <span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">10</span><span class="p">,</span> <span class="mi">20</span><span class="p">,</span> <span class="mi">30</span><span class="p">,</span> <span class="mi">40</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">50</span><span class="p">,</span> <span class="o">-</span><span class="mi">30</span><span class="p">,</span> <span class="o">-</span><span class="mi">50</span><span class="p">]</span>
<span class="gp">... </span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> a b c</span>
<span class="go">0 4 10 100</span>
<span class="go">1 5 20 50</span>
<span class="go">2 6 30 -30</span>
<span class="go">3 7 40 -50</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[(</span><span class="n">df</span><span class="o">.</span><span class="n">c</span> <span class="o">-</span> <span class="mi">43</span><span class="p">)</span><span class="o">.</span><span class="n">abs</span><span class="p">()</span><span class="o">.</span><span class="n">argsort</span><span class="p">()]</span>
<span class="go"> a b c</span>
<span class="go">1 5 20 50</span>
<span class="go">0 4 10 100</span>
<span class="go">2 6 30 -30</span>
<span class="go">3 7 40 -50</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.add">
<code class="descname">add</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="Permalink to this definition"></a></dt>
<dd><p>Get Addition of dataframe and other, element-wise (binary operator <cite>add</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">dataframe</span> <span class="pre">+</span> <span class="pre">other</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>radd</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.apply">
<code class="descname">apply</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.apply" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.apply.html#pandas.DataFrame.apply" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.apply()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘apply’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.asfreq">
<code class="descname">asfreq</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.asfreq" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.asfreq.html#pandas.DataFrame.asfreq" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.asfreq()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘asfreq’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.astype">
<code class="descname">astype</code><span class="sig-paren">(</span><em>dtype</em>, <em>copy</em>, <em>errors</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.astype" title="Permalink to this definition"></a></dt>
<dd><p>Cast a pandas object to a specified dtype <code class="docutils literal notranslate"><span class="pre">dtype</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>dtype</strong> (<em>data type</em><em>, or </em><em>dict of column name -&gt; data type</em>) – Use a numpy.dtype or Python type to cast entire pandas object to
the same type. Alternatively, use {col: dtype, …}, where col is a
column label and dtype is a numpy.dtype or Python type to cast one
or more of the DeferredDataFrame’s columns to column-specific types.</li>
<li><strong>copy</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Return a copy when <code class="docutils literal notranslate"><span class="pre">copy=True</span></code> (be very careful setting
<code class="docutils literal notranslate"><span class="pre">copy=False</span></code> as changes to values then may propagate to other
pandas objects).</li>
<li><strong>errors</strong> (<em>{'raise'</em><em>, </em><em>'ignore'}</em><em>, </em><em>default 'raise'</em>) – <p>Control raising of exceptions on invalid data for provided dtype.</p>
<ul>
<li><code class="docutils literal notranslate"><span class="pre">raise</span></code> : allow exceptions to be raised</li>
<li><code class="docutils literal notranslate"><span class="pre">ignore</span></code> : suppress exceptions. On error return original object.</li>
</ul>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><strong>casted</strong></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">same type as caller</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>astype is not parallelizable when <code class="docutils literal notranslate"><span class="pre">errors=&quot;ignore&quot;</span></code> is specified.</p>
<p><code class="docutils literal notranslate"><span class="pre">copy=False</span></code> is not supported because it relies on memory-sharing
semantics.</p>
<p><code class="docutils literal notranslate"><span class="pre">dtype=&quot;category</span></code> is not supported because the type of the output column
depends on the data. Please use <code class="docutils literal notranslate"><span class="pre">pd.CategoricalDtype</span></code> with explicit
categories instead.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">to_datetime()</span></code></dt>
<dd>Convert argument to datetime.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">to_timedelta()</span></code></dt>
<dd>Convert argument to timedelta.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">to_numeric()</span></code></dt>
<dd>Convert argument to a numeric type.</dd>
<dt><a class="reference external" href="https://numpy.org/doc/stable/reference/generated/numpy.ndarray.astype.html#numpy.ndarray.astype" title="(in NumPy v1.22)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.ndarray.astype()</span></code></a></dt>
<dd>Cast a numpy array to a specified type.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<div class="deprecated">
<p><span class="versionmodified">Deprecated since version 1.3.0: </span>Using <code class="docutils literal notranslate"><span class="pre">astype</span></code> to convert from timezone-naive dtype to
timezone-aware dtype is deprecated and will raise in a
future version. Use <code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.dt.tz_localize()</span></code> instead.</p>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Create a DataFrame:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">d</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]}</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="n">d</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dtypes</span>
<span class="go">col1 int64</span>
<span class="go">col2 int64</span>
<span class="go">dtype: object</span>
<span class="go">Cast all columns to int32:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s1">&#39;int32&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">dtypes</span>
<span class="go">col1 int32</span>
<span class="go">col2 int32</span>
<span class="go">dtype: object</span>
<span class="go">Cast col1 to int32 using a dictionary:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">astype</span><span class="p">({</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="s1">&#39;int32&#39;</span><span class="p">})</span><span class="o">.</span><span class="n">dtypes</span>
<span class="go">col1 int32</span>
<span class="go">col2 int64</span>
<span class="go">dtype: object</span>
<span class="go">Create a series:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">dtype</span><span class="o">=</span><span class="s1">&#39;int32&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">dtype: int32</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s1">&#39;int64&#39;</span><span class="p">)</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">dtype: int64</span>
<span class="go">Convert to categorical type:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s1">&#39;category&#39;</span><span class="p">)</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">dtype: category</span>
<span class="go">Categories (2, int64): [1, 2]</span>
<span class="go">Convert to ordered categorical type with custom ordering:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">pandas.api.types</span> <span class="kn">import</span> <span class="n">CategoricalDtype</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">cat_dtype</span> <span class="o">=</span> <span class="n">CategoricalDtype</span><span class="p">(</span>
<span class="gp">... </span> <span class="n">categories</span><span class="o">=</span><span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">ordered</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">cat_dtype</span><span class="p">)</span>
<span class="go">0 1</span>
<span class="go">1 2</span>
<span class="go">dtype: category</span>
<span class="go">Categories (2, int64): [2 &lt; 1]</span>
<span class="go">Note that using ``copy=False`` and changing data on a new</span>
<span class="go">pandas object may propagate changes:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s2</span> <span class="o">=</span> <span class="n">s1</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="s1">&#39;int64&#39;</span><span class="p">,</span> <span class="n">copy</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s2</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="mi">10</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s1</span> <span class="c1"># note that s1[0] has changed too</span>
<span class="go">0 10</span>
<span class="go">1 2</span>
<span class="go">dtype: int64</span>
<span class="go">Create a series of dates:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser_date</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s1">&#39;20200101&#39;</span><span class="p">,</span> <span class="n">periods</span><span class="o">=</span><span class="mi">3</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ser_date</span>
<span class="go">0 2020-01-01</span>
<span class="go">1 2020-01-02</span>
<span class="go">2 2020-01-03</span>
<span class="go">dtype: datetime64[ns]</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.at">
<code class="descname">at</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.at" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.at()</span></code> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘at’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.at_time">
<code class="descname">at_time</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.at_time" title="Permalink to this definition"></a></dt>
<dd><p>Select values at particular time of day (e.g., 9:30AM).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>time</strong> (<a class="reference external" href="https://docs.python.org/3/library/datetime.html#datetime.time" title="(in Python v3.10)"><em>datetime.time</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – </li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – </li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#TypeError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> – If the index is not a <code class="xref py py-class docutils literal notranslate"><span class="pre">DatetimeIndex</span></code></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.between_time" title="apache_beam.dataframe.frames.DeferredDataFrame.between_time"><code class="xref py py-meth docutils literal notranslate"><span class="pre">between_time()</span></code></a></dt>
<dd>Select values between particular times of the day.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.first" title="apache_beam.dataframe.frames.DeferredDataFrame.first"><code class="xref py py-meth docutils literal notranslate"><span class="pre">first()</span></code></a></dt>
<dd>Select initial periods of time series based on a date offset.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.last" title="apache_beam.dataframe.frames.DeferredDataFrame.last"><code class="xref py py-meth docutils literal notranslate"><span class="pre">last()</span></code></a></dt>
<dd>Select final periods of time series based on a date offset.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">DatetimeIndex.indexer_at_time()</span></code></dt>
<dd>Get just the index locations for values at particular time of the day.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">i</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s1">&#39;2018-04-09&#39;</span><span class="p">,</span> <span class="n">periods</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">freq</span><span class="o">=</span><span class="s1">&#39;12H&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span> <span class="n">index</span><span class="o">=</span><span class="n">i</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span>
<span class="go"> A</span>
<span class="go">2018-04-09 00:00:00 1</span>
<span class="go">2018-04-09 12:00:00 2</span>
<span class="go">2018-04-10 00:00:00 3</span>
<span class="go">2018-04-10 12:00:00 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span><span class="o">.</span><span class="n">at_time</span><span class="p">(</span><span class="s1">&#39;12:00&#39;</span><span class="p">)</span>
<span class="go"> A</span>
<span class="go">2018-04-09 12:00:00 2</span>
<span class="go">2018-04-10 12:00:00 4</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.attrs">
<code class="descname">attrs</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.attrs" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.attrs()</span></code> is not yet supported in the Beam DataFrame API because it is experimental in pandas.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.backfill">
<code class="descname">backfill</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.backfill" title="Permalink to this definition"></a></dt>
<dd><p>Synonym for <code class="xref py py-meth docutils literal notranslate"><span class="pre">DataFrame.fillna()</span></code> with <code class="docutils literal notranslate"><span class="pre">method='bfill'</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Object with missing values filled or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">DeferredSeries/DeferredDataFrame or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>backfill is only supported for axis=”columns”. axis=”index” is order-sensitive.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.between_time">
<code class="descname">between_time</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.between_time" title="Permalink to this definition"></a></dt>
<dd><p>Select values between particular times of the day (e.g., 9:00-9:30 AM).</p>
<p>By setting <code class="docutils literal notranslate"><span class="pre">start_time</span></code> to be later than <code class="docutils literal notranslate"><span class="pre">end_time</span></code>,
you can get the times that are <em>not</em> between the two times.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>start_time</strong> (<a class="reference external" href="https://docs.python.org/3/library/datetime.html#datetime.time" title="(in Python v3.10)"><em>datetime.time</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – Initial time as a time filter limit.</li>
<li><strong>end_time</strong> (<a class="reference external" href="https://docs.python.org/3/library/datetime.html#datetime.time" title="(in Python v3.10)"><em>datetime.time</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – End time as a time filter limit.</li>
<li><strong>include_start</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Whether the start time needs to be included in the result.</li>
<li><strong>include_end</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Whether the end time needs to be included in the result.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – Determine range time on index or columns value.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Data from the original object filtered to the specified dates range.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#TypeError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> – If the index is not a <code class="xref py py-class docutils literal notranslate"><span class="pre">DatetimeIndex</span></code></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.at_time" title="apache_beam.dataframe.frames.DeferredDataFrame.at_time"><code class="xref py py-meth docutils literal notranslate"><span class="pre">at_time()</span></code></a></dt>
<dd>Select values at a particular time of the day.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.first" title="apache_beam.dataframe.frames.DeferredDataFrame.first"><code class="xref py py-meth docutils literal notranslate"><span class="pre">first()</span></code></a></dt>
<dd>Select initial periods of time series based on a date offset.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.last" title="apache_beam.dataframe.frames.DeferredDataFrame.last"><code class="xref py py-meth docutils literal notranslate"><span class="pre">last()</span></code></a></dt>
<dd>Select final periods of time series based on a date offset.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">DatetimeIndex.indexer_between_time()</span></code></dt>
<dd>Get just the index locations for values between particular times of the day.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">i</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s1">&#39;2018-04-09&#39;</span><span class="p">,</span> <span class="n">periods</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">freq</span><span class="o">=</span><span class="s1">&#39;1D20min&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span> <span class="n">index</span><span class="o">=</span><span class="n">i</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span>
<span class="go"> A</span>
<span class="go">2018-04-09 00:00:00 1</span>
<span class="go">2018-04-10 00:20:00 2</span>
<span class="go">2018-04-11 00:40:00 3</span>
<span class="go">2018-04-12 01:00:00 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span><span class="o">.</span><span class="n">between_time</span><span class="p">(</span><span class="s1">&#39;0:15&#39;</span><span class="p">,</span> <span class="s1">&#39;0:45&#39;</span><span class="p">)</span>
<span class="go"> A</span>
<span class="go">2018-04-10 00:20:00 2</span>
<span class="go">2018-04-11 00:40:00 3</span>
<span class="go">You get the times that are *not* between two times by setting</span>
<span class="go">``start_time`` later than ``end_time``:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span><span class="o">.</span><span class="n">between_time</span><span class="p">(</span><span class="s1">&#39;0:45&#39;</span><span class="p">,</span> <span class="s1">&#39;0:15&#39;</span><span class="p">)</span>
<span class="go"> A</span>
<span class="go">2018-04-09 00:00:00 1</span>
<span class="go">2018-04-12 01:00:00 4</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.bfill">
<code class="descname">bfill</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.bfill" title="Permalink to this definition"></a></dt>
<dd><p>bfill is only supported for axis=”columns”. axis=”index” is order-sensitive.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.bool">
<code class="descname">bool</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.bool" title="Permalink to this definition"></a></dt>
<dd><p>Return the bool of a single element Series or DataFrame.</p>
<p>This must be a boolean scalar value, either True or False. It will raise a
ValueError if the Series or DataFrame does not have exactly 1 element, or that
element is not boolean (integer values 0 and 1 will also raise an exception).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">The value in the DeferredSeries or DeferredDataFrame.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)">bool</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.astype" title="apache_beam.dataframe.frames.DeferredSeries.astype"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.astype()</span></code></a></dt>
<dd>Change the data type of a DeferredSeries, including to boolean.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.astype" title="apache_beam.dataframe.frames.DeferredDataFrame.astype"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.astype()</span></code></a></dt>
<dd>Change the data type of a DeferredDataFrame, including to boolean.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.bool_()</span></code></dt>
<dd>NumPy boolean data type, used by pandas for boolean values.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">The method will only work for single element objects with a boolean value:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="kc">True</span><span class="p">])</span><span class="o">.</span><span class="n">bool</span><span class="p">()</span>
<span class="go">True</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="kc">False</span><span class="p">])</span><span class="o">.</span><span class="n">bool</span><span class="p">()</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;col&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">True</span><span class="p">]})</span><span class="o">.</span><span class="n">bool</span><span class="p">()</span>
<span class="go">True</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;col&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">False</span><span class="p">]})</span><span class="o">.</span><span class="n">bool</span><span class="p">()</span>
<span class="go">False</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.boxplot">
<code class="descname">boxplot</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.boxplot" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.boxplot.html#pandas.DataFrame.boxplot" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.boxplot()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘boxplot’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.combine">
<code class="descname">combine</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.combine" title="Permalink to this definition"></a></dt>
<dd><p>Perform column-wise combine with another DataFrame.</p>
<p>Combines a DataFrame with <cite>other</cite> DataFrame using <cite>func</cite>
to element-wise combine columns. The row and column indexes of the
resulting DataFrame will be the union of the two.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – The DeferredDataFrame to merge column-wise.</li>
<li><strong>func</strong> (<em>function</em>) – Function that takes two series as inputs and return a DeferredSeries or a
scalar. Used to merge the two dataframes column by columns.</li>
<li><strong>fill_value</strong> (<em>scalar value</em><em>, </em><em>default None</em>) – The value to fill NaNs with prior to passing any column to the
merge func.</li>
<li><strong>overwrite</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – If True, columns in <cite>self</cite> that do not exist in <cite>other</cite> will be
overwritten with NaNs.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Combination of the provided DeferredDataFrames.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.combine_first" title="apache_beam.dataframe.frames.DeferredDataFrame.combine_first"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.combine_first()</span></code></a></dt>
<dd>Combine two DeferredDataFrame objects and default to non-null values in frame calling the method.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Combine using a simple function that chooses the smaller column.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">take_smaller</span> <span class="o">=</span> <span class="k">lambda</span> <span class="n">s1</span><span class="p">,</span> <span class="n">s2</span><span class="p">:</span> <span class="n">s1</span> <span class="k">if</span> <span class="n">s1</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="o">&lt;</span> <span class="n">s2</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span> <span class="k">else</span> <span class="n">s2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">take_smaller</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 0 3</span>
<span class="go">1 0 3</span>
<span class="go">Example using a true element-wise combine function.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">minimum</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 1 2</span>
<span class="go">1 0 3</span>
<span class="go">Using `fill_value` fills Nones prior to passing the column to the</span>
<span class="go">merge function.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">take_smaller</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=-</span><span class="mi">5</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 0 -5.0</span>
<span class="go">1 0 4.0</span>
<span class="go">However, if the same element in both dataframes is None, that None</span>
<span class="go">is preserved</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="mi">3</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">take_smaller</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=-</span><span class="mi">5</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 0 -5.0</span>
<span class="go">1 0 3.0</span>
<span class="go">Example that demonstrates the use of `overwrite` and behavior when</span>
<span class="go">the axis differ between the dataframes.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s1">&#39;C&#39;</span><span class="p">:</span> <span class="p">[</span><span class="o">-</span><span class="mi">10</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">},</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">take_smaller</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 NaN NaN NaN</span>
<span class="go">1 NaN 3.0 -10.0</span>
<span class="go">2 NaN 3.0 1.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df2</span><span class="p">,</span> <span class="n">take_smaller</span><span class="p">,</span> <span class="n">overwrite</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 0.0 NaN NaN</span>
<span class="go">1 0.0 3.0 -10.0</span>
<span class="go">2 NaN 3.0 1.0</span>
<span class="go">Demonstrating the preference of the passed in dataframe.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s1">&#39;C&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">},</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df1</span><span class="p">,</span> <span class="n">take_smaller</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 0.0 NaN NaN</span>
<span class="go">1 0.0 3.0 NaN</span>
<span class="go">2 NaN 3.0 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span><span class="o">.</span><span class="n">combine</span><span class="p">(</span><span class="n">df1</span><span class="p">,</span> <span class="n">take_smaller</span><span class="p">,</span> <span class="n">overwrite</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 0.0 NaN NaN</span>
<span class="go">1 0.0 3.0 1.0</span>
<span class="go">2 NaN 3.0 1.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.combine_first">
<code class="descname">combine_first</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.combine_first" title="Permalink to this definition"></a></dt>
<dd><p>Update null elements with value in the same location in <cite>other</cite>.</p>
<p>Combine two DataFrame objects by filling null values in one DataFrame
with non-null values from other DataFrame. The row and column indexes
of the resulting DataFrame will be the union of the two.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Provided DeferredDataFrame to use to fill null values.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">The result of combining the provided DeferredDataFrame with the other object.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.combine" title="apache_beam.dataframe.frames.DeferredDataFrame.combine"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.combine()</span></code></a></dt>
<dd>Perform series-wise operation on two DeferredDataFrames using a given function.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine_first</span><span class="p">(</span><span class="n">df2</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 1.0 3.0</span>
<span class="go">1 0.0 4.0</span>
<span class="go">Null values still persist if the location of that null value</span>
<span class="go">does not exist in `other`</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="kc">None</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s1">&#39;C&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]},</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">combine_first</span><span class="p">(</span><span class="n">df2</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 NaN 4.0 NaN</span>
<span class="go">1 0.0 3.0 1.0</span>
<span class="go">2 NaN 3.0 1.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.convert_dtypes">
<code class="descname">convert_dtypes</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.convert_dtypes" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.convert_dtypes.html#pandas.DataFrame.convert_dtypes" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.convert_dtypes()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘convert_dtypes’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.copy">
<code class="descname">copy</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.copy" title="Permalink to this definition"></a></dt>
<dd><p>Make a copy of this object’s indices and data.</p>
<p>When <code class="docutils literal notranslate"><span class="pre">deep=True</span></code> (default), a new object will be created with a
copy of the calling object’s data and indices. Modifications to
the data or indices of the copy will not be reflected in the
original object (see notes below).</p>
<p>When <code class="docutils literal notranslate"><span class="pre">deep=False</span></code>, a new object will be created without copying
the calling object’s data or index (only references to the data
and index are copied). Any changes to the data of the original
will be reflected in the shallow copy (and vice versa).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>deep</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Make a deep copy, including a copy of the data and the indices.
With <code class="docutils literal notranslate"><span class="pre">deep=False</span></code> neither the indices nor the data are copied.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><strong>copy</strong> – Object type matches caller.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Notes</p>
<p>When <code class="docutils literal notranslate"><span class="pre">deep=True</span></code>, data is copied but actual Python objects
will not be copied recursively, only the reference to the object.
This is in contrast to <cite>copy.deepcopy</cite> in the Standard Library,
which recursively copies object data (see examples below).</p>
<p>While <code class="docutils literal notranslate"><span class="pre">Index</span></code> objects are copied when <code class="docutils literal notranslate"><span class="pre">deep=True</span></code>, the underlying
numpy array is not copied for performance reasons. Since <code class="docutils literal notranslate"><span class="pre">Index</span></code> is
immutable, the underlying data can be safely shared and a copy
is not needed.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">a 1</span>
<span class="go">b 2</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s_copy</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s_copy</span>
<span class="go">a 1</span>
<span class="go">b 2</span>
<span class="go">dtype: int64</span>
<span class="go">**Shallow copy versus default (deep) copy:**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">deep</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">shallow</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">copy</span><span class="p">(</span><span class="n">deep</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">Shallow copy shares data and index with original.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="ow">is</span> <span class="n">shallow</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">values</span> <span class="ow">is</span> <span class="n">shallow</span><span class="o">.</span><span class="n">values</span> <span class="ow">and</span> <span class="n">s</span><span class="o">.</span><span class="n">index</span> <span class="ow">is</span> <span class="n">shallow</span><span class="o">.</span><span class="n">index</span>
<span class="go">True</span>
<span class="go">Deep copy has own copy of data and index.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="ow">is</span> <span class="n">deep</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">values</span> <span class="ow">is</span> <span class="n">deep</span><span class="o">.</span><span class="n">values</span> <span class="ow">or</span> <span class="n">s</span><span class="o">.</span><span class="n">index</span> <span class="ow">is</span> <span class="n">deep</span><span class="o">.</span><span class="n">index</span>
<span class="go">False</span>
<span class="go">Updates to the data shared by shallow copy and original is reflected</span>
<span class="go">in both; deep copy remains unchanged.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="mi">3</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">shallow</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="mi">4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">a 3</span>
<span class="go">b 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">shallow</span>
<span class="go">a 3</span>
<span class="go">b 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">deep</span>
<span class="go">a 1</span>
<span class="go">b 2</span>
<span class="go">dtype: int64</span>
<span class="go">Note that when copying an object containing Python objects, a deep copy</span>
<span class="go">will copy the data, but will not do so recursively. Updating a nested</span>
<span class="go">data object will be reflected in the deep copy.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">deep</span> <span class="o">=</span> <span class="n">s</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="p">[</span><span class="mi">0</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="mi">10</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 [10, 2]</span>
<span class="go">1 [3, 4]</span>
<span class="go">dtype: object</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">deep</span>
<span class="go">0 [10, 2]</span>
<span class="go">1 [3, 4]</span>
<span class="go">dtype: object</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.div">
<code class="descname">div</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="Permalink to this definition"></a></dt>
<dd><p>Get Floating division of dataframe and other, element-wise (binary operator <cite>truediv</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">dataframe</span> <span class="pre">/</span> <span class="pre">other</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>rtruediv</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.divide">
<code class="descname">divide</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.divide" title="Permalink to this definition"></a></dt>
<dd><p>Get Floating division of dataframe and other, element-wise (binary operator <cite>truediv</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">dataframe</span> <span class="pre">/</span> <span class="pre">other</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>rtruediv</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.drop">
<code class="descname">drop</code><span class="sig-paren">(</span><em>labels</em>, <em>axis</em>, <em>index</em>, <em>columns</em>, <em>errors</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.drop" title="Permalink to this definition"></a></dt>
<dd><p>Drop specified labels from rows or columns.</p>
<p>Remove rows or columns by specifying label names and corresponding
axis, or by specifying directly index or column names. When using a
multi-index, labels on different levels can be removed by specifying
the level. See the <cite>user guide &lt;advanced.shown_levels&gt;</cite>
for more information about the now unused levels.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>labels</strong> (<em>single label</em><em> or </em><em>list-like</em>) – Index or column labels to drop.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – Whether to drop labels from the index (0 or ‘index’) or
columns (1 or ‘columns’).</li>
<li><strong>index</strong> (<em>single label</em><em> or </em><em>list-like</em>) – Alternative to specifying axis (<code class="docutils literal notranslate"><span class="pre">labels,</span> <span class="pre">axis=0</span></code>
is equivalent to <code class="docutils literal notranslate"><span class="pre">index=labels</span></code>).</li>
<li><strong>columns</strong> (<em>single label</em><em> or </em><em>list-like</em>) – Alternative to specifying axis (<code class="docutils literal notranslate"><span class="pre">labels,</span> <span class="pre">axis=1</span></code>
is equivalent to <code class="docutils literal notranslate"><span class="pre">columns=labels</span></code>).</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em>, </em><em>optional</em>) – For MultiIndex, level from which the labels will be removed.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If False, return a copy. Otherwise, do operation
inplace and return None.</li>
<li><strong>errors</strong> (<em>{'ignore'</em><em>, </em><em>'raise'}</em><em>, </em><em>default 'raise'</em>) – If ‘ignore’, suppress error and only existing labels are
dropped.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">DeferredDataFrame without the removed index or column labels or
None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#KeyError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">KeyError</span></code></a> – If any of the labels is not found in the selected axis.</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>drop is not parallelizable when dropping from the index and
<code class="docutils literal notranslate"><span class="pre">errors=&quot;raise&quot;</span></code> is specified. It requires collecting all data on a single
node in order to detect if one of the index values is missing.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.loc" title="apache_beam.dataframe.frames.DeferredDataFrame.loc"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.loc()</span></code></a></dt>
<dd>Label-location based indexer for selection by label.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.dropna" title="apache_beam.dataframe.frames.DeferredDataFrame.dropna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.dropna()</span></code></a></dt>
<dd>Return DeferredDataFrame with labels on given axis omitted where (all or any) data are missing.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.drop_duplicates" title="apache_beam.dataframe.frames.DeferredDataFrame.drop_duplicates"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.drop_duplicates()</span></code></a></dt>
<dd>Return DeferredDataFrame with duplicate rows removed, optionally only considering certain columns.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.drop" title="apache_beam.dataframe.frames.DeferredSeries.drop"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.drop()</span></code></a></dt>
<dd>Return DeferredSeries with specified index labels removed.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">arange</span><span class="p">(</span><span class="mi">12</span><span class="p">)</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;D&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B C D</span>
<span class="go">0 0 1 2 3</span>
<span class="go">1 4 5 6 7</span>
<span class="go">2 8 9 10 11</span>
<span class="go">Drop columns</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">([</span><span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> A D</span>
<span class="go">0 0 3</span>
<span class="go">1 4 7</span>
<span class="go">2 8 11</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">])</span>
<span class="go"> A D</span>
<span class="go">0 0 3</span>
<span class="go">1 4 7</span>
<span class="go">2 8 11</span>
<span class="go">Drop a row by index</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span>
<span class="go"> A B C D</span>
<span class="go">2 8 9 10 11</span>
<span class="go">Drop columns and/or rows of MultiIndex DataFrame</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">midx</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="p">(</span><span class="n">levels</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;lama&#39;</span><span class="p">,</span> <span class="s1">&#39;cow&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;speed&#39;</span><span class="p">,</span> <span class="s1">&#39;weight&#39;</span><span class="p">,</span> <span class="s1">&#39;length&#39;</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">codes</span><span class="o">=</span><span class="p">[[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="n">midx</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;big&#39;</span><span class="p">,</span> <span class="s1">&#39;small&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">data</span><span class="o">=</span><span class="p">[[</span><span class="mi">45</span><span class="p">,</span> <span class="mi">30</span><span class="p">],</span> <span class="p">[</span><span class="mi">200</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span> <span class="p">[</span><span class="mf">1.5</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">30</span><span class="p">,</span> <span class="mi">20</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">150</span><span class="p">],</span> <span class="p">[</span><span class="mf">1.5</span><span class="p">,</span> <span class="mf">0.8</span><span class="p">],</span> <span class="p">[</span><span class="mi">320</span><span class="p">,</span> <span class="mi">250</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mf">0.8</span><span class="p">],</span> <span class="p">[</span><span class="mf">0.3</span><span class="p">,</span> <span class="mf">0.2</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> big small</span>
<span class="go">lama speed 45.0 30.0</span>
<span class="go"> weight 200.0 100.0</span>
<span class="go"> length 1.5 1.0</span>
<span class="go">cow speed 30.0 20.0</span>
<span class="go"> weight 250.0 150.0</span>
<span class="go"> length 1.5 0.8</span>
<span class="go">falcon speed 320.0 250.0</span>
<span class="go"> weight 1.0 0.8</span>
<span class="go"> length 0.3 0.2</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="s1">&#39;cow&#39;</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="s1">&#39;small&#39;</span><span class="p">)</span>
<span class="go"> big</span>
<span class="go">lama speed 45.0</span>
<span class="go"> weight 200.0</span>
<span class="go"> length 1.5</span>
<span class="go">falcon speed 320.0</span>
<span class="go"> weight 1.0</span>
<span class="go"> length 0.3</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">drop</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="s1">&#39;length&#39;</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> big small</span>
<span class="go">lama speed 45.0 30.0</span>
<span class="go"> weight 200.0 100.0</span>
<span class="go">cow speed 30.0 20.0</span>
<span class="go"> weight 250.0 150.0</span>
<span class="go">falcon speed 320.0 250.0</span>
<span class="go"> weight 1.0 0.8</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.droplevel">
<code class="descname">droplevel</code><span class="sig-paren">(</span><em>level</em>, <em>axis</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.droplevel" title="Permalink to this definition"></a></dt>
<dd><p>Return Series/DataFrame with requested index / column level(s) removed.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, or </em><em>list-like</em>) – If a string is given, must be the name of a level
If list-like, elements must be names or positional indexes
of levels.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – <p>Axis along which the level(s) is removed:</p>
<ul>
<li>0 or ‘index’: remove level(s) in column.</li>
<li>1 or ‘columns’: remove level(s) in row.</li>
</ul>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">DeferredSeries/DeferredDataFrame with requested index / column level(s) removed.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">DeferredSeries/DeferredDataFrame</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">9</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">11</span><span class="p">,</span> <span class="mi">12</span><span class="p">]</span>
<span class="gp">... </span><span class="p">])</span><span class="o">.</span><span class="n">set_index</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span><span class="o">.</span><span class="n">rename_axis</span><span class="p">([</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">columns</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_tuples</span><span class="p">([</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;f&#39;</span><span class="p">)</span>
<span class="gp">... </span><span class="p">],</span> <span class="n">names</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;level_1&#39;</span><span class="p">,</span> <span class="s1">&#39;level_2&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go">level_1 c d</span>
<span class="go">level_2 e f</span>
<span class="go">a b</span>
<span class="go">1 2 3 4</span>
<span class="go">5 6 7 8</span>
<span class="go">9 10 11 12</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">droplevel</span><span class="p">(</span><span class="s1">&#39;a&#39;</span><span class="p">)</span>
<span class="go">level_1 c d</span>
<span class="go">level_2 e f</span>
<span class="go">b</span>
<span class="go">2 3 4</span>
<span class="go">6 7 8</span>
<span class="go">10 11 12</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">droplevel</span><span class="p">(</span><span class="s1">&#39;level_2&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">level_1 c d</span>
<span class="go">a b</span>
<span class="go">1 2 3 4</span>
<span class="go">5 6 7 8</span>
<span class="go">9 10 11 12</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.dtype">
<code class="descname">dtype</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.dtype" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.empty">
<code class="descname">empty</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.empty" title="Permalink to this definition"></a></dt>
<dd><p>Indicator whether DataFrame is empty.</p>
<p>True if DataFrame is entirely empty (no items), meaning any of the
axes are of length 0.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">If DeferredDataFrame is empty, return True, if not return False.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)">bool</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.dropna" title="apache_beam.dataframe.frames.DeferredSeries.dropna"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredSeries.dropna</span></code></a></dt>
<dd>Return series without null values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.dropna" title="apache_beam.dataframe.frames.DeferredDataFrame.dropna"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.dropna</span></code></a></dt>
<dd>Return DeferredDataFrame with labels on given axis omitted where (all or any) data are missing.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>If DeferredDataFrame contains only NaNs, it is still not considered empty. See
the example below.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">An example of an actual empty DataFrame. Notice the index is empty:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_empty</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span> <span class="p">:</span> <span class="p">[]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_empty</span>
<span class="go">Empty DataFrame</span>
<span class="go">Columns: [A]</span>
<span class="go">Index: []</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_empty</span><span class="o">.</span><span class="n">empty</span>
<span class="go">True</span>
<span class="go">If we only have NaNs in our DataFrame, it is not considered empty! We</span>
<span class="go">will need to drop the NaNs to make the DataFrame empty:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span> <span class="p">:</span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A</span>
<span class="go">0 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">empty</span>
<span class="go">False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">dropna</span><span class="p">()</span><span class="o">.</span><span class="n">empty</span>
<span class="go">True</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.eq">
<code class="descname">eq</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.eq" title="Permalink to this definition"></a></dt>
<dd><p>Get Equal to of dataframe and other, element-wise (binary operator <cite>eq</cite>).</p>
<p>Among flexible wrappers (<cite>eq</cite>, <cite>ne</cite>, <cite>le</cite>, <cite>lt</cite>, <cite>ge</cite>, <cite>gt</cite>) to comparison
operators.</p>
<p>Equivalent to <cite>==</cite>, <cite>!=</cite>, <cite>&lt;=</cite>, <cite>&lt;</cite>, <cite>&gt;=</cite>, <cite>&gt;</cite> with support to choose axis
(rows or columns) and level for comparison.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 'columns'</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’).</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the passed
MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the comparison.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">DeferredDataFrame of bool</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eq" title="apache_beam.dataframe.frames.DeferredDataFrame.eq"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.eq()</span></code></a></dt>
<dd>Compare DeferredDataFrames for equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.ne" title="apache_beam.dataframe.frames.DeferredDataFrame.ne"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.ne()</span></code></a></dt>
<dd>Compare DeferredDataFrames for inequality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.le" title="apache_beam.dataframe.frames.DeferredDataFrame.le"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.le()</span></code></a></dt>
<dd>Compare DeferredDataFrames for less than inequality or equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.lt" title="apache_beam.dataframe.frames.DeferredDataFrame.lt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.lt()</span></code></a></dt>
<dd>Compare DeferredDataFrames for strictly less than inequality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.ge" title="apache_beam.dataframe.frames.DeferredDataFrame.ge"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.ge()</span></code></a></dt>
<dd>Compare DeferredDataFrames for greater than inequality or equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.gt" title="apache_beam.dataframe.frames.DeferredDataFrame.gt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.gt()</span></code></a></dt>
<dd>Compare DeferredDataFrames for strictly greater than inequality elementwise.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.
<cite>NaN</cite> values are considered different (i.e. <cite>NaN</cite> != <cite>NaN</cite>).</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;cost&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">300</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> cost revenue</span>
<span class="go">A 250 100</span>
<span class="go">B 150 250</span>
<span class="go">C 100 300</span>
<span class="go">Comparison with a scalar, using either the operator or method:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">==</span> <span class="mi">100</span>
<span class="go"> cost revenue</span>
<span class="go">A False True</span>
<span class="go">B False False</span>
<span class="go">C True False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eq</span><span class="p">(</span><span class="mi">100</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A False True</span>
<span class="go">B False False</span>
<span class="go">C True False</span>
<span class="go">When `other` is a :class:`Series`, the columns of a DataFrame are aligned</span>
<span class="go">with the index of `other` and broadcast:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">!=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;cost&quot;</span><span class="p">,</span> <span class="s2">&quot;revenue&quot;</span><span class="p">])</span>
<span class="go"> cost revenue</span>
<span class="go">A True True</span>
<span class="go">B True False</span>
<span class="go">C False True</span>
<span class="go">Use the method to control the broadcast axis:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">ne</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">100</span><span class="p">,</span> <span class="mi">300</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;A&quot;</span><span class="p">,</span> <span class="s2">&quot;D&quot;</span><span class="p">]),</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A True False</span>
<span class="go">B True True</span>
<span class="go">C True True</span>
<span class="go">D True True</span>
<span class="go">When comparing to an arbitrary sequence, the number of columns must</span>
<span class="go">match the number elements in `other`:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">==</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">]</span>
<span class="go"> cost revenue</span>
<span class="go">A True True</span>
<span class="go">B False False</span>
<span class="go">C False False</span>
<span class="go">Use the method to control the axis:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eq</span><span class="p">([</span><span class="mi">250</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A True False</span>
<span class="go">B False True</span>
<span class="go">C True False</span>
<span class="go">Compare to a DataFrame of different shape.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">300</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">150</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;D&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> revenue</span>
<span class="go">A 300</span>
<span class="go">B 250</span>
<span class="go">C 100</span>
<span class="go">D 150</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">gt</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A False False</span>
<span class="go">B False False</span>
<span class="go">C False True</span>
<span class="go">D False False</span>
<span class="go">Compare to a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;cost&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">300</span><span class="p">,</span> <span class="mi">220</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">300</span><span class="p">,</span> <span class="mi">200</span><span class="p">,</span> <span class="mi">175</span><span class="p">,</span> <span class="mi">225</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> cost revenue</span>
<span class="go">Q1 A 250 100</span>
<span class="go"> B 150 250</span>
<span class="go"> C 100 300</span>
<span class="go">Q2 A 150 200</span>
<span class="go"> B 300 175</span>
<span class="go"> C 220 225</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">le</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">Q1 A True True</span>
<span class="go"> B True True</span>
<span class="go"> C True True</span>
<span class="go">Q2 A False True</span>
<span class="go"> B True False</span>
<span class="go"> C True False</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.equals">
<code class="descname">equals</code><span class="sig-paren">(</span><em>other</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.equals" title="Permalink to this definition"></a></dt>
<dd><p>Test whether two objects contain the same elements.</p>
<p>This function allows two Series or DataFrames to be compared against
each other to see if they have the same shape and elements. NaNs in
the same location are considered equal.</p>
<p>The row/column index do not need to have the same type, as long
as the values are considered equal. Corresponding columns must be of
the same dtype.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>other</strong> (<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em> or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – The other DeferredSeries or DeferredDataFrame to be compared with the first.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">True if all elements are the same in both objects, False
otherwise.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)">bool</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.eq" title="apache_beam.dataframe.frames.DeferredSeries.eq"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.eq()</span></code></a></dt>
<dd>Compare two DeferredSeries objects of the same length and return a DeferredSeries where each element is True if the element in each DeferredSeries is equal, False otherwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eq" title="apache_beam.dataframe.frames.DeferredDataFrame.eq"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.eq()</span></code></a></dt>
<dd>Compare two DeferredDataFrame objects of the same shape and return a DeferredDataFrame where each element is True if the respective element in each DeferredDataFrame is equal, False otherwise.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">testing.assert_series_equal()</span></code></dt>
<dd>Raises an AssertionError if left and right are not equal. Provides an easy interface to ignore inequality in dtypes, indexes and precision among others.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">testing.assert_frame_equal()</span></code></dt>
<dd>Like assert_series_equal, but targets DeferredDataFrames.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">numpy.array_equal()</span></code></dt>
<dd>Return True if two arrays have the same shape and elements, False otherwise.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="mi">1</span><span class="p">:</span> <span class="p">[</span><span class="mi">10</span><span class="p">],</span> <span class="mi">2</span><span class="p">:</span> <span class="p">[</span><span class="mi">20</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> 1 2</span>
<span class="go">0 10 20</span>
<span class="go">DataFrames df and exactly_equal have the same types and values for</span>
<span class="go">their elements and column labels, which will return True.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">exactly_equal</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="mi">1</span><span class="p">:</span> <span class="p">[</span><span class="mi">10</span><span class="p">],</span> <span class="mi">2</span><span class="p">:</span> <span class="p">[</span><span class="mi">20</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">exactly_equal</span>
<span class="go"> 1 2</span>
<span class="go">0 10 20</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">equals</span><span class="p">(</span><span class="n">exactly_equal</span><span class="p">)</span>
<span class="go">True</span>
<span class="go">DataFrames df and different_column_type have the same element</span>
<span class="go">types and values, but have different types for the column labels,</span>
<span class="go">which will still return True.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">different_column_type</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="mf">1.0</span><span class="p">:</span> <span class="p">[</span><span class="mi">10</span><span class="p">],</span> <span class="mf">2.0</span><span class="p">:</span> <span class="p">[</span><span class="mi">20</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">different_column_type</span>
<span class="go"> 1.0 2.0</span>
<span class="go">0 10 20</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">equals</span><span class="p">(</span><span class="n">different_column_type</span><span class="p">)</span>
<span class="go">True</span>
<span class="go">DataFrames df and different_data_type have different types for the</span>
<span class="go">same values for their elements, and will return False even though</span>
<span class="go">their column labels are the same values and types.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">different_data_type</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="mi">1</span><span class="p">:</span> <span class="p">[</span><span class="mf">10.0</span><span class="p">],</span> <span class="mi">2</span><span class="p">:</span> <span class="p">[</span><span class="mf">20.0</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">different_data_type</span>
<span class="go"> 1 2</span>
<span class="go">0 10.0 20.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">equals</span><span class="p">(</span><span class="n">different_data_type</span><span class="p">)</span>
<span class="go">False</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.ewm">
<code class="descname">ewm</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.ewm" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.ewm.html#pandas.Series.ewm" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.ewm()</span></code></a> is not yet supported in the Beam DataFrame API because implementing it would require integrating with Beam event-time semantics</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-event-time-semantics">https://s.apache.org/dataframe-event-time-semantics</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.expanding">
<code class="descname">expanding</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.expanding" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.expanding.html#pandas.Series.expanding" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.expanding()</span></code></a> is not yet supported in the Beam DataFrame API because implementing it would require integrating with Beam event-time semantics</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-event-time-semantics">https://s.apache.org/dataframe-event-time-semantics</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.ffill">
<code class="descname">ffill</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.ffill" title="Permalink to this definition"></a></dt>
<dd><p>ffill is only supported for axis=”columns”. axis=”index” is order-sensitive.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.fillna">
<code class="descname">fillna</code><span class="sig-paren">(</span><em>value</em>, <em>method</em>, <em>axis</em>, <em>limit</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.fillna" title="Permalink to this definition"></a></dt>
<dd><p>Fill NA/NaN values using the specified method.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>value</strong> (<em>scalar</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Value to use to fill holes (e.g. 0), alternately a
dict/DeferredSeries/DeferredDataFrame of values specifying which value to use for
each index (for a DeferredSeries) or column (for a DeferredDataFrame). Values not
in the dict/DeferredSeries/DeferredDataFrame will not be filled. This value cannot
be a list.</li>
<li><strong>method</strong> (<em>{'backfill'</em><em>, </em><em>'bfill'</em><em>, </em><em>'pad'</em><em>, </em><em>'ffill'</em><em>, </em><em>None}</em><em>, </em><em>default None</em>) – Method to use for filling holes in reindexed DeferredSeries
pad / ffill: propagate last valid observation forward to next valid
backfill / bfill: use next valid observation to fill gap.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Axis along which to fill missing values.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If True, fill in-place. Note: this will modify any
other views on this object (e.g., a no-copy slice for a column in a
DeferredDataFrame).</li>
<li><strong>limit</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default None</em>) – If method is specified, this is the maximum number of consecutive
NaN values to forward/backward fill. In other words, if there is
a gap with more than this number of consecutive NaNs, it will only
be partially filled. If method is not specified, this is the
maximum number of entries along the entire axis where NaNs will be
filled. Must be greater than 0 if not None.</li>
<li><strong>downcast</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>default is None</em>) – A dict of item-&gt;dtype of what to downcast if possible,
or the string ‘infer’ which will try to downcast to an appropriate
equal type (e.g. float64 to int64 if possible).</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Object with missing values filled or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>When <code class="docutils literal notranslate"><span class="pre">axis=&quot;index&quot;</span></code>, both <code class="docutils literal notranslate"><span class="pre">method</span></code> and <code class="docutils literal notranslate"><span class="pre">limit</span></code> must be <code class="docutils literal notranslate"><span class="pre">None</span></code>.
otherwise this operation is order-sensitive.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.interpolate" title="apache_beam.dataframe.frames.DeferredDataFrame.interpolate"><code class="xref py py-meth docutils literal notranslate"><span class="pre">interpolate()</span></code></a></dt>
<dd>Fill NaN values using interpolation.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.reindex" title="apache_beam.dataframe.frames.DeferredDataFrame.reindex"><code class="xref py py-meth docutils literal notranslate"><span class="pre">reindex()</span></code></a></dt>
<dd>Conform object to new index.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.asfreq" title="apache_beam.dataframe.frames.DeferredDataFrame.asfreq"><code class="xref py py-meth docutils literal notranslate"><span class="pre">asfreq()</span></code></a></dt>
<dd>Convert TimeDeferredSeries to specified frequency.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">5</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="mi">4</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="s2">&quot;ABCD&quot;</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B C D</span>
<span class="go">0 NaN 2.0 NaN 0</span>
<span class="go">1 3.0 4.0 NaN 1</span>
<span class="go">2 NaN NaN NaN 5</span>
<span class="go">3 NaN 3.0 NaN 4</span>
<span class="go">Replace all NaN elements with 0s.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> A B C D</span>
<span class="go">0 0.0 2.0 0.0 0</span>
<span class="go">1 3.0 4.0 0.0 1</span>
<span class="go">2 0.0 0.0 0.0 5</span>
<span class="go">3 0.0 3.0 0.0 4</span>
<span class="go">We can also propagate non-null values forward or backward.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">method</span><span class="o">=</span><span class="s2">&quot;ffill&quot;</span><span class="p">)</span>
<span class="go"> A B C D</span>
<span class="go">0 NaN 2.0 NaN 0</span>
<span class="go">1 3.0 4.0 NaN 1</span>
<span class="go">2 3.0 4.0 NaN 5</span>
<span class="go">3 3.0 3.0 NaN 4</span>
<span class="go">Replace all NaN elements in column &#39;A&#39;, &#39;B&#39;, &#39;C&#39;, and &#39;D&#39;, with 0, 1,</span>
<span class="go">2, and 3 respectively.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">values</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;A&quot;</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;B&quot;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;C&quot;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s2">&quot;D&quot;</span><span class="p">:</span> <span class="mi">3</span><span class="p">}</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">value</span><span class="o">=</span><span class="n">values</span><span class="p">)</span>
<span class="go"> A B C D</span>
<span class="go">0 0.0 2.0 2.0 0</span>
<span class="go">1 3.0 4.0 2.0 1</span>
<span class="go">2 0.0 1.0 2.0 5</span>
<span class="go">3 0.0 3.0 2.0 4</span>
<span class="go">Only replace the first NaN element.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">value</span><span class="o">=</span><span class="n">values</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> A B C D</span>
<span class="go">0 0.0 2.0 2.0 0</span>
<span class="go">1 3.0 4.0 NaN 1</span>
<span class="go">2 NaN 1.0 NaN 5</span>
<span class="go">3 NaN 3.0 NaN 4</span>
<span class="go">When filling using a DataFrame, replacement happens along</span>
<span class="go">the same column names and same indices</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">)),</span> <span class="n">columns</span><span class="o">=</span><span class="nb">list</span><span class="p">(</span><span class="s2">&quot;ABCE&quot;</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">fillna</span><span class="p">(</span><span class="n">df2</span><span class="p">)</span>
<span class="go"> A B C D</span>
<span class="go">0 0.0 2.0 0.0 0</span>
<span class="go">1 3.0 4.0 0.0 1</span>
<span class="go">2 0.0 0.0 0.0 5</span>
<span class="go">3 0.0 3.0 0.0 4</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.filter">
<code class="descname">filter</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.filter" title="Permalink to this definition"></a></dt>
<dd><p>Subset the dataframe rows or columns according to the specified index labels.</p>
<p>Note that this routine does not filter a dataframe on its
contents. The filter is applied to the labels of the index.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>items</strong> (<em>list-like</em>) – Keep labels from axis which are in items.</li>
<li><strong>like</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – Keep labels from axis for which “like in label == True”.</li>
<li><strong>regex</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> (</em><em>regular expression</em><em>)</em>) – Keep labels from axis for which re.search(regex, label) == True.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>‘index’</em><em>, </em><em>1</em><em> or </em><em>‘columns’</em><em>, </em><em>None}</em><em>, </em><em>default None</em>) – The axis to filter on, expressed either as an index (int)
or axis name (str). By default this is the info axis,
‘index’ for DeferredSeries, ‘columns’ for DeferredDataFrame.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">same type as input object</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.loc" title="apache_beam.dataframe.frames.DeferredDataFrame.loc"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.loc()</span></code></a></dt>
<dd>Access a group of rows and columns by label(s) or a boolean array.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>The <code class="docutils literal notranslate"><span class="pre">items</span></code>, <code class="docutils literal notranslate"><span class="pre">like</span></code>, and <code class="docutils literal notranslate"><span class="pre">regex</span></code> parameters are
enforced to be mutually exclusive.</p>
<p><code class="docutils literal notranslate"><span class="pre">axis</span></code> defaults to the info axis that is used when indexing
with <code class="docutils literal notranslate"><span class="pre">[]</span></code>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">])),</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;mouse&#39;</span><span class="p">,</span> <span class="s1">&#39;rabbit&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;one&#39;</span><span class="p">,</span> <span class="s1">&#39;two&#39;</span><span class="p">,</span> <span class="s1">&#39;three&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> one two three</span>
<span class="go">mouse 1 2 3</span>
<span class="go">rabbit 4 5 6</span>
<span class="gp">&gt;&gt;&gt; </span><span class="c1"># select columns by name</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">items</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;one&#39;</span><span class="p">,</span> <span class="s1">&#39;three&#39;</span><span class="p">])</span>
<span class="go"> one three</span>
<span class="go">mouse 1 3</span>
<span class="go">rabbit 4 6</span>
<span class="gp">&gt;&gt;&gt; </span><span class="c1"># select columns by regular expression</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">regex</span><span class="o">=</span><span class="s1">&#39;e$&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> one three</span>
<span class="go">mouse 1 3</span>
<span class="go">rabbit 4 6</span>
<span class="gp">&gt;&gt;&gt; </span><span class="c1"># select rows containing &#39;bbi&#39;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">like</span><span class="o">=</span><span class="s1">&#39;bbi&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> one two three</span>
<span class="go">rabbit 4 5 6</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.first">
<code class="descname">first</code><span class="sig-paren">(</span><em>offset</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.first" title="Permalink to this definition"></a></dt>
<dd><p>Select initial periods of time series data based on a date offset.</p>
<p>When having a DataFrame with dates as index, this function can
select the first few rows based on a date offset.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>offset</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>DateOffset</em><em> or </em><em>dateutil.relativedelta</em>) – The offset length of the data that will be selected. For instance,
‘1M’ will display all the rows having their index within the first month.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A subset of the caller.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#TypeError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> – If the index is not a <code class="xref py py-class docutils literal notranslate"><span class="pre">DatetimeIndex</span></code></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.last" title="apache_beam.dataframe.frames.DeferredDataFrame.last"><code class="xref py py-meth docutils literal notranslate"><span class="pre">last()</span></code></a></dt>
<dd>Select final periods of time series based on a date offset.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.at_time" title="apache_beam.dataframe.frames.DeferredDataFrame.at_time"><code class="xref py py-meth docutils literal notranslate"><span class="pre">at_time()</span></code></a></dt>
<dd>Select values at a particular time of the day.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.between_time" title="apache_beam.dataframe.frames.DeferredDataFrame.between_time"><code class="xref py py-meth docutils literal notranslate"><span class="pre">between_time()</span></code></a></dt>
<dd>Select values between particular times of the day.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">i</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s1">&#39;2018-04-09&#39;</span><span class="p">,</span> <span class="n">periods</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">freq</span><span class="o">=</span><span class="s1">&#39;2D&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span> <span class="n">index</span><span class="o">=</span><span class="n">i</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span>
<span class="go"> A</span>
<span class="go">2018-04-09 1</span>
<span class="go">2018-04-11 2</span>
<span class="go">2018-04-13 3</span>
<span class="go">2018-04-15 4</span>
<span class="go">Get the rows for the first 3 days:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span><span class="o">.</span><span class="n">first</span><span class="p">(</span><span class="s1">&#39;3D&#39;</span><span class="p">)</span>
<span class="go"> A</span>
<span class="go">2018-04-09 1</span>
<span class="go">2018-04-11 2</span>
<span class="go">Notice the data for 3 first calendar days were returned, not the first</span>
<span class="go">3 days observed in the dataset, and therefore data for 2018-04-13 was</span>
<span class="go">not returned.</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.flags">
<code class="descname">flags</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.flags" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.flags()</span></code> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘flags’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.floordiv">
<code class="descname">floordiv</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="Permalink to this definition"></a></dt>
<dd><p>Get Integer division of dataframe and other, element-wise (binary operator <cite>floordiv</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">dataframe</span> <span class="pre">//</span> <span class="pre">other</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>rfloordiv</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.ge">
<code class="descname">ge</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.ge" title="Permalink to this definition"></a></dt>
<dd><p>Get Greater than or equal to of dataframe and other, element-wise (binary operator <cite>ge</cite>).</p>
<p>Among flexible wrappers (<cite>eq</cite>, <cite>ne</cite>, <cite>le</cite>, <cite>lt</cite>, <cite>ge</cite>, <cite>gt</cite>) to comparison
operators.</p>
<p>Equivalent to <cite>==</cite>, <cite>!=</cite>, <cite>&lt;=</cite>, <cite>&lt;</cite>, <cite>&gt;=</cite>, <cite>&gt;</cite> with support to choose axis
(rows or columns) and level for comparison.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 'columns'</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’).</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the passed
MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the comparison.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">DeferredDataFrame of bool</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eq" title="apache_beam.dataframe.frames.DeferredDataFrame.eq"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.eq()</span></code></a></dt>
<dd>Compare DeferredDataFrames for equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.ne" title="apache_beam.dataframe.frames.DeferredDataFrame.ne"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.ne()</span></code></a></dt>
<dd>Compare DeferredDataFrames for inequality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.le" title="apache_beam.dataframe.frames.DeferredDataFrame.le"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.le()</span></code></a></dt>
<dd>Compare DeferredDataFrames for less than inequality or equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.lt" title="apache_beam.dataframe.frames.DeferredDataFrame.lt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.lt()</span></code></a></dt>
<dd>Compare DeferredDataFrames for strictly less than inequality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.ge" title="apache_beam.dataframe.frames.DeferredDataFrame.ge"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.ge()</span></code></a></dt>
<dd>Compare DeferredDataFrames for greater than inequality or equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.gt" title="apache_beam.dataframe.frames.DeferredDataFrame.gt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.gt()</span></code></a></dt>
<dd>Compare DeferredDataFrames for strictly greater than inequality elementwise.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.
<cite>NaN</cite> values are considered different (i.e. <cite>NaN</cite> != <cite>NaN</cite>).</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;cost&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">300</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> cost revenue</span>
<span class="go">A 250 100</span>
<span class="go">B 150 250</span>
<span class="go">C 100 300</span>
<span class="go">Comparison with a scalar, using either the operator or method:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">==</span> <span class="mi">100</span>
<span class="go"> cost revenue</span>
<span class="go">A False True</span>
<span class="go">B False False</span>
<span class="go">C True False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eq</span><span class="p">(</span><span class="mi">100</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A False True</span>
<span class="go">B False False</span>
<span class="go">C True False</span>
<span class="go">When `other` is a :class:`Series`, the columns of a DataFrame are aligned</span>
<span class="go">with the index of `other` and broadcast:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">!=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;cost&quot;</span><span class="p">,</span> <span class="s2">&quot;revenue&quot;</span><span class="p">])</span>
<span class="go"> cost revenue</span>
<span class="go">A True True</span>
<span class="go">B True False</span>
<span class="go">C False True</span>
<span class="go">Use the method to control the broadcast axis:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">ne</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">100</span><span class="p">,</span> <span class="mi">300</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;A&quot;</span><span class="p">,</span> <span class="s2">&quot;D&quot;</span><span class="p">]),</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A True False</span>
<span class="go">B True True</span>
<span class="go">C True True</span>
<span class="go">D True True</span>
<span class="go">When comparing to an arbitrary sequence, the number of columns must</span>
<span class="go">match the number elements in `other`:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">==</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">]</span>
<span class="go"> cost revenue</span>
<span class="go">A True True</span>
<span class="go">B False False</span>
<span class="go">C False False</span>
<span class="go">Use the method to control the axis:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eq</span><span class="p">([</span><span class="mi">250</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A True False</span>
<span class="go">B False True</span>
<span class="go">C True False</span>
<span class="go">Compare to a DataFrame of different shape.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">300</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">150</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;D&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> revenue</span>
<span class="go">A 300</span>
<span class="go">B 250</span>
<span class="go">C 100</span>
<span class="go">D 150</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">gt</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A False False</span>
<span class="go">B False False</span>
<span class="go">C False True</span>
<span class="go">D False False</span>
<span class="go">Compare to a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;cost&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">300</span><span class="p">,</span> <span class="mi">220</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">300</span><span class="p">,</span> <span class="mi">200</span><span class="p">,</span> <span class="mi">175</span><span class="p">,</span> <span class="mi">225</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> cost revenue</span>
<span class="go">Q1 A 250 100</span>
<span class="go"> B 150 250</span>
<span class="go"> C 100 300</span>
<span class="go">Q2 A 150 200</span>
<span class="go"> B 300 175</span>
<span class="go"> C 220 225</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">le</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">Q1 A True True</span>
<span class="go"> B True True</span>
<span class="go"> C True True</span>
<span class="go">Q2 A False True</span>
<span class="go"> B True False</span>
<span class="go"> C True False</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.groupby">
<code class="descname">groupby</code><span class="sig-paren">(</span><em>by</em>, <em>level</em>, <em>axis</em>, <em>as_index</em>, <em>group_keys</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.groupby" title="Permalink to this definition"></a></dt>
<dd><p>Group DataFrame using a mapper or by a Series of columns.</p>
<p>A groupby operation involves some combination of splitting the
object, applying a function, and combining the results. This can be
used to group large amounts of data and compute operations on these
groups.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>by</strong> (<em>mapping</em><em>, </em><em>function</em><em>, </em><em>label</em><em>, or </em><em>list of labels</em>) – Used to determine the groups for the groupby.
If <code class="docutils literal notranslate"><span class="pre">by</span></code> is a function, it’s called on each value of the object’s
index. If a dict or DeferredSeries is passed, the DeferredSeries or dict VALUES
will be used to determine the groups (the DeferredSeries’ values are first
aligned; see <code class="docutils literal notranslate"><span class="pre">.align()</span></code> method). If an ndarray is passed, the
values are used as-is to determine the groups. A label or list of
labels may be passed to group by the columns in <code class="docutils literal notranslate"><span class="pre">self</span></code>. Notice
that a tuple is interpreted as a (single) key.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – Split along rows (0) or columns (1).</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>level name</em><em>, or </em><em>sequence of such</em><em>, </em><em>default None</em>) – If the axis is a MultiIndex (hierarchical), group by a particular
level or levels.</li>
<li><strong>as_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – For aggregated output, return object with group labels as the
index. Only relevant for DeferredDataFrame input. as_index=False is
effectively “SQL-style” grouped output.</li>
<li><strong>sort</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Sort group keys. Get better performance by turning this off.
Note this does not influence the order of observations within each
group. Groupby preserves the order of rows within each group.</li>
<li><strong>group_keys</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – When calling apply, add group keys to index to identify pieces.</li>
<li><strong>squeeze</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>Reduce the dimensionality of the return type if possible,
otherwise return a consistent type.</p>
<div class="deprecated">
<p><span class="versionmodified">Deprecated since version 1.1.0.</span></p>
</div>
</li>
<li><strong>observed</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – This only applies if any of the groupers are Categoricals.
If True: only show observed values for categorical groupers.
If False: show all values for categorical groupers.</li>
<li><strong>dropna</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – <p>If True, and if group keys contain NA values, NA values together
with row/column will be dropped.
If False, NA values will also be treated as the key in groups</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Returns a groupby object that contains information about the groups.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">DeferredDataFrameGroupBy</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">as_index</span></code> and <code class="docutils literal notranslate"><span class="pre">group_keys</span></code> must both be <code class="docutils literal notranslate"><span class="pre">True</span></code>.</p>
<p>Aggregations grouping by a categorical column with <code class="docutils literal notranslate"><span class="pre">observed=False</span></code> set
are not currently parallelizable
(<a class="reference external" href="https://issues.apache.org/jira/browse/BEAM-11190">BEAM-11190</a>).</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.resample" title="apache_beam.dataframe.frames.DeferredDataFrame.resample"><code class="xref py py-meth docutils literal notranslate"><span class="pre">resample()</span></code></a></dt>
<dd>Convenience method for frequency conversion and resampling of time series.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>See the <a class="reference external" href="https://pandas.pydata.org/pandas-docs/stable/groupby.html">user guide</a> for more.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;Animal&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;Falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;Falcon&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;Parrot&#39;</span><span class="p">,</span> <span class="s1">&#39;Parrot&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;Max Speed&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mf">380.</span><span class="p">,</span> <span class="mf">370.</span><span class="p">,</span> <span class="mf">24.</span><span class="p">,</span> <span class="mf">26.</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> Animal Max Speed</span>
<span class="go">0 Falcon 380.0</span>
<span class="go">1 Falcon 370.0</span>
<span class="go">2 Parrot 24.0</span>
<span class="go">3 Parrot 26.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">([</span><span class="s1">&#39;Animal&#39;</span><span class="p">])</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="go"> Max Speed</span>
<span class="go">Animal</span>
<span class="go">Falcon 375.0</span>
<span class="go">Parrot 25.0</span>
<span class="go">**Hierarchical Indexes**</span>
<span class="go">We can groupby different levels of a hierarchical index</span>
<span class="go">using the `level` parameter:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">arrays</span> <span class="o">=</span> <span class="p">[[</span><span class="s1">&#39;Falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;Falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;Parrot&#39;</span><span class="p">,</span> <span class="s1">&#39;Parrot&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;Captive&#39;</span><span class="p">,</span> <span class="s1">&#39;Wild&#39;</span><span class="p">,</span> <span class="s1">&#39;Captive&#39;</span><span class="p">,</span> <span class="s1">&#39;Wild&#39;</span><span class="p">]]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">index</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_arrays</span><span class="p">(</span><span class="n">arrays</span><span class="p">,</span> <span class="n">names</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;Animal&#39;</span><span class="p">,</span> <span class="s1">&#39;Type&#39;</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;Max Speed&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mf">390.</span><span class="p">,</span> <span class="mf">350.</span><span class="p">,</span> <span class="mf">30.</span><span class="p">,</span> <span class="mf">20.</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="n">index</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> Max Speed</span>
<span class="go">Animal Type</span>
<span class="go">Falcon Captive 390.0</span>
<span class="go"> Wild 350.0</span>
<span class="go">Parrot Captive 30.0</span>
<span class="go"> Wild 20.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="go"> Max Speed</span>
<span class="go">Animal</span>
<span class="go">Falcon 370.0</span>
<span class="go">Parrot 25.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="s2">&quot;Type&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span>
<span class="go"> Max Speed</span>
<span class="go">Type</span>
<span class="go">Captive 210.0</span>
<span class="go">Wild 185.0</span>
<span class="go">We can also choose to include NA in group keys or not by setting</span>
<span class="go">`dropna` parameter, the default setting is `True`:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">l</span> <span class="o">=</span> <span class="p">[[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="kc">None</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">]]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">,</span> <span class="s2">&quot;c&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;b&quot;</span><span class="p">])</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="go"> a c</span>
<span class="go">b</span>
<span class="go">1.0 2 3</span>
<span class="go">2.0 2 5</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;b&quot;</span><span class="p">],</span> <span class="n">dropna</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="go"> a c</span>
<span class="go">b</span>
<span class="go">1.0 2 3</span>
<span class="go">2.0 2 5</span>
<span class="go">NaN 1 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">l</span> <span class="o">=</span> <span class="p">[[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="mi">12</span><span class="p">,</span> <span class="mi">12</span><span class="p">],</span> <span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="mf">12.3</span><span class="p">,</span> <span class="mf">33.</span><span class="p">],</span> <span class="p">[</span><span class="s2">&quot;b&quot;</span><span class="p">,</span> <span class="mf">12.3</span><span class="p">,</span> <span class="mi">123</span><span class="p">],</span> <span class="p">[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">l</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">,</span> <span class="s2">&quot;c&quot;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="s2">&quot;a&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="go"> b c</span>
<span class="go">a</span>
<span class="go">a 13.0 13.0</span>
<span class="go">b 12.3 123.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="n">dropna</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">sum</span><span class="p">()</span>
<span class="go"> b c</span>
<span class="go">a</span>
<span class="go">a 13.0 13.0</span>
<span class="go">b 12.3 123.0</span>
<span class="go">NaN 12.3 33.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.gt">
<code class="descname">gt</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.gt" title="Permalink to this definition"></a></dt>
<dd><p>Get Greater than of dataframe and other, element-wise (binary operator <cite>gt</cite>).</p>
<p>Among flexible wrappers (<cite>eq</cite>, <cite>ne</cite>, <cite>le</cite>, <cite>lt</cite>, <cite>ge</cite>, <cite>gt</cite>) to comparison
operators.</p>
<p>Equivalent to <cite>==</cite>, <cite>!=</cite>, <cite>&lt;=</cite>, <cite>&lt;</cite>, <cite>&gt;=</cite>, <cite>&gt;</cite> with support to choose axis
(rows or columns) and level for comparison.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 'columns'</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’).</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the passed
MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the comparison.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">DeferredDataFrame of bool</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eq" title="apache_beam.dataframe.frames.DeferredDataFrame.eq"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.eq()</span></code></a></dt>
<dd>Compare DeferredDataFrames for equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.ne" title="apache_beam.dataframe.frames.DeferredDataFrame.ne"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.ne()</span></code></a></dt>
<dd>Compare DeferredDataFrames for inequality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.le" title="apache_beam.dataframe.frames.DeferredDataFrame.le"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.le()</span></code></a></dt>
<dd>Compare DeferredDataFrames for less than inequality or equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.lt" title="apache_beam.dataframe.frames.DeferredDataFrame.lt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.lt()</span></code></a></dt>
<dd>Compare DeferredDataFrames for strictly less than inequality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.ge" title="apache_beam.dataframe.frames.DeferredDataFrame.ge"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.ge()</span></code></a></dt>
<dd>Compare DeferredDataFrames for greater than inequality or equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.gt" title="apache_beam.dataframe.frames.DeferredDataFrame.gt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.gt()</span></code></a></dt>
<dd>Compare DeferredDataFrames for strictly greater than inequality elementwise.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.
<cite>NaN</cite> values are considered different (i.e. <cite>NaN</cite> != <cite>NaN</cite>).</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;cost&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">300</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> cost revenue</span>
<span class="go">A 250 100</span>
<span class="go">B 150 250</span>
<span class="go">C 100 300</span>
<span class="go">Comparison with a scalar, using either the operator or method:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">==</span> <span class="mi">100</span>
<span class="go"> cost revenue</span>
<span class="go">A False True</span>
<span class="go">B False False</span>
<span class="go">C True False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eq</span><span class="p">(</span><span class="mi">100</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A False True</span>
<span class="go">B False False</span>
<span class="go">C True False</span>
<span class="go">When `other` is a :class:`Series`, the columns of a DataFrame are aligned</span>
<span class="go">with the index of `other` and broadcast:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">!=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;cost&quot;</span><span class="p">,</span> <span class="s2">&quot;revenue&quot;</span><span class="p">])</span>
<span class="go"> cost revenue</span>
<span class="go">A True True</span>
<span class="go">B True False</span>
<span class="go">C False True</span>
<span class="go">Use the method to control the broadcast axis:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">ne</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">100</span><span class="p">,</span> <span class="mi">300</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;A&quot;</span><span class="p">,</span> <span class="s2">&quot;D&quot;</span><span class="p">]),</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A True False</span>
<span class="go">B True True</span>
<span class="go">C True True</span>
<span class="go">D True True</span>
<span class="go">When comparing to an arbitrary sequence, the number of columns must</span>
<span class="go">match the number elements in `other`:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">==</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">]</span>
<span class="go"> cost revenue</span>
<span class="go">A True True</span>
<span class="go">B False False</span>
<span class="go">C False False</span>
<span class="go">Use the method to control the axis:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eq</span><span class="p">([</span><span class="mi">250</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A True False</span>
<span class="go">B False True</span>
<span class="go">C True False</span>
<span class="go">Compare to a DataFrame of different shape.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">300</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">150</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;D&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> revenue</span>
<span class="go">A 300</span>
<span class="go">B 250</span>
<span class="go">C 100</span>
<span class="go">D 150</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">gt</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A False False</span>
<span class="go">B False False</span>
<span class="go">C False True</span>
<span class="go">D False False</span>
<span class="go">Compare to a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;cost&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">300</span><span class="p">,</span> <span class="mi">220</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">300</span><span class="p">,</span> <span class="mi">200</span><span class="p">,</span> <span class="mi">175</span><span class="p">,</span> <span class="mi">225</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> cost revenue</span>
<span class="go">Q1 A 250 100</span>
<span class="go"> B 150 250</span>
<span class="go"> C 100 300</span>
<span class="go">Q2 A 150 200</span>
<span class="go"> B 300 175</span>
<span class="go"> C 220 225</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">le</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">Q1 A True True</span>
<span class="go"> B True True</span>
<span class="go"> C True True</span>
<span class="go">Q2 A False True</span>
<span class="go"> B True False</span>
<span class="go"> C True False</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.hist">
<code class="descname">hist</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.hist" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.hist.html#pandas.DataFrame.hist" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.hist()</span></code></a> is not yet supported in the Beam DataFrame API because it is a plotting tool.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-plotting-tools">https://s.apache.org/dataframe-plotting-tools</a>.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.iloc">
<code class="descname">iloc</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.iloc" title="Permalink to this definition"></a></dt>
<dd><p>Purely integer-location based indexing for selection by position.</p>
<p><code class="docutils literal notranslate"><span class="pre">.iloc[]</span></code> is primarily integer position based (from <code class="docutils literal notranslate"><span class="pre">0</span></code> to
<code class="docutils literal notranslate"><span class="pre">length-1</span></code> of the axis), but may also be used with a boolean
array.</p>
<p>Allowed inputs are:</p>
<ul class="simple">
<li>An integer, e.g. <code class="docutils literal notranslate"><span class="pre">5</span></code>.</li>
<li>A list or array of integers, e.g. <code class="docutils literal notranslate"><span class="pre">[4,</span> <span class="pre">3,</span> <span class="pre">0]</span></code>.</li>
<li>A slice object with ints, e.g. <code class="docutils literal notranslate"><span class="pre">1:7</span></code>.</li>
<li>A boolean array.</li>
<li>A <code class="docutils literal notranslate"><span class="pre">callable</span></code> function with one argument (the calling Series or
DataFrame) and that returns valid output for indexing (one of the above).
This is useful in method chains, when you don’t have a reference to the
calling object, but would like to base your selection on some value.</li>
</ul>
<p><code class="docutils literal notranslate"><span class="pre">.iloc</span></code> will raise <code class="docutils literal notranslate"><span class="pre">IndexError</span></code> if a requested indexer is
out-of-bounds, except <em>slice</em> indexers which allow out-of-bounds
indexing (this conforms with python/numpy <em>slice</em> semantics).</p>
<p>See more at <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/indexing.html#indexing-integer" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">Selection by Position</span></a>.</p>
<p class="rubric">Differences from pandas</p>
<p>Position-based indexing with <cite>iloc</cite> is order-sensitive in almost every
case. Beam DataFrame users should prefer label-based indexing with <cite>loc</cite>.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.iat" title="apache_beam.dataframe.frames.DeferredDataFrame.iat"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.iat</span></code></a></dt>
<dd>Fast integer location scalar accessor.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.loc" title="apache_beam.dataframe.frames.DeferredDataFrame.loc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.loc</span></code></a></dt>
<dd>Purely label-location based indexer for selection by label.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.iloc" title="apache_beam.dataframe.frames.DeferredSeries.iloc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredSeries.iloc</span></code></a></dt>
<dd>Purely integer-location based indexing for selection by position.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">mydict</span> <span class="o">=</span> <span class="p">[{</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="mi">3</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">:</span> <span class="mi">4</span><span class="p">},</span>
<span class="gp">... </span> <span class="p">{</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="mi">100</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="mi">200</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="mi">300</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">:</span> <span class="mi">400</span><span class="p">},</span>
<span class="gp">... </span> <span class="p">{</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="mi">1000</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="mi">2000</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="mi">3000</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">:</span> <span class="mi">4000</span> <span class="p">}]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">mydict</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
<span class="go">1 100 200 300 400</span>
<span class="go">2 1000 2000 3000 4000</span>
<span class="go">**Indexing just the rows**</span>
<span class="go">With a scalar integer.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="nb">type</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<span class="go">&lt;class &#39;pandas.core.series.Series&#39;&gt;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="go">a 1</span>
<span class="go">b 2</span>
<span class="go">c 3</span>
<span class="go">d 4</span>
<span class="go">Name: 0, dtype: int64</span>
<span class="go">With a list of integers.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[[</span><span class="mi">0</span><span class="p">]]</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="nb">type</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[[</span><span class="mi">0</span><span class="p">]])</span>
<span class="go">&lt;class &#39;pandas.core.frame.DataFrame&#39;&gt;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">]]</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
<span class="go">1 100 200 300 400</span>
<span class="go">With a `slice` object.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:</span><span class="mi">3</span><span class="p">]</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
<span class="go">1 100 200 300 400</span>
<span class="go">2 1000 2000 3000 4000</span>
<span class="go">With a boolean mask the same length as the index.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">]]</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
<span class="go">2 1000 2000 3000 4000</span>
<span class="go">With a callable, useful in method chains. The `x` passed</span>
<span class="go">to the ``lambda`` is the DataFrame being sliced. This selects</span>
<span class="go">the rows whose index label even.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">index</span> <span class="o">%</span> <span class="mi">2</span> <span class="o">==</span> <span class="mi">0</span><span class="p">]</span>
<span class="go"> a b c d</span>
<span class="go">0 1 2 3 4</span>
<span class="go">2 1000 2000 3000 4000</span>
<span class="go">**Indexing both axes**</span>
<span class="go">You can mix the indexer types for the index and columns. Use ``:`` to</span>
<span class="go">select the entire axis.</span>
<span class="go">With scalar integers.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span>
<span class="go">2</span>
<span class="go">With lists of integers.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">3</span><span class="p">]]</span>
<span class="go"> b d</span>
<span class="go">0 2 4</span>
<span class="go">2 2000 4000</span>
<span class="go">With `slice` objects.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="mi">3</span><span class="p">,</span> <span class="mi">0</span><span class="p">:</span><span class="mi">3</span><span class="p">]</span>
<span class="go"> a b c</span>
<span class="go">1 100 200 300</span>
<span class="go">2 1000 2000 3000</span>
<span class="go">With a boolean array whose length matches the columns.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="p">[</span><span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">]]</span>
<span class="go"> a c</span>
<span class="go">0 1 3</span>
<span class="go">1 100 300</span>
<span class="go">2 1000 3000</span>
<span class="go">With a callable function that expects the Series or DataFrame.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">iloc</span><span class="p">[:,</span> <span class="k">lambda</span> <span class="n">df</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">]]</span>
<span class="go"> a c</span>
<span class="go">0 1 3</span>
<span class="go">1 100 300</span>
<span class="go">2 1000 3000</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.index">
<code class="descname">index</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.index" title="Permalink to this definition"></a></dt>
<dd><p>The index (row labels) of the DataFrame.</p>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.infer_object">
<code class="descname">infer_object</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.infer_object" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.infer_objects.html#pandas.Series.infer_objects" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.infer_objects()</span></code></a> is not yet supported in the Beam DataFrame API because the columns in the output DataFrame depend on the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-columns">https://s.apache.org/dataframe-non-deferred-columns</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.infer_objects">
<code class="descname">infer_objects</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.infer_objects" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.infer_objects.html#pandas.DataFrame.infer_objects" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.infer_objects()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘infer_objects’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.isin">
<code class="descname">isin</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.isin" title="Permalink to this definition"></a></dt>
<dd><p>Whether each element in the DataFrame is contained in values.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>values</strong> (<em>iterable</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a>) – The result will only be true at a location if all the
labels match. If <cite>values</cite> is a DeferredSeries, that’s the index. If
<cite>values</cite> is a dict, the keys must be the column names,
which must match. If <cite>values</cite> is a DeferredDataFrame,
then both the index and column labels must match.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">DeferredDataFrame of booleans showing whether each element in the DeferredDataFrame
is contained in values.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eq" title="apache_beam.dataframe.frames.DeferredDataFrame.eq"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.eq()</span></code></a></dt>
<dd>Equality test for DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.isin" title="apache_beam.dataframe.frames.DeferredSeries.isin"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.isin()</span></code></a></dt>
<dd>Equivalent method on DeferredSeries.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.str.contains()</span></code></dt>
<dd>Test if pattern or regex is contained within a string of a DeferredSeries or Index.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;num_legs&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="s1">&#39;num_wings&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;dog&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> num_legs num_wings</span>
<span class="go">falcon 2 2</span>
<span class="go">dog 4 0</span>
<span class="go">When ``values`` is a list check whether every value in the DataFrame</span>
<span class="go">is present in the list (which animals have 0 or 2 legs or wings)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">isin</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span>
<span class="go"> num_legs num_wings</span>
<span class="go">falcon True True</span>
<span class="go">dog False True</span>
<span class="go">When ``values`` is a dict, we can pass values to check for each</span>
<span class="go">column separately:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">isin</span><span class="p">({</span><span class="s1">&#39;num_wings&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">]})</span>
<span class="go"> num_legs num_wings</span>
<span class="go">falcon False False</span>
<span class="go">dog False True</span>
<span class="go">When ``values`` is a Series or DataFrame the index and column must</span>
<span class="go">match. Note that &#39;falcon&#39; does not match based on the number of legs</span>
<span class="go">in df2.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;num_legs&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">8</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;num_wings&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;spider&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">isin</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
<span class="go"> num_legs num_wings</span>
<span class="go">falcon True True</span>
<span class="go">dog False False</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.last">
<code class="descname">last</code><span class="sig-paren">(</span><em>offset</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.last" title="Permalink to this definition"></a></dt>
<dd><p>Select final periods of time series data based on a date offset.</p>
<p>For a DataFrame with a sorted DatetimeIndex, this function
selects the last few rows based on a date offset.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><strong>offset</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>DateOffset</em><em>, </em><em>dateutil.relativedelta</em>) – The offset length of the data that will be selected. For instance,
‘3D’ will display all the rows having their index within the last 3 days.</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body">A subset of the caller.</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#TypeError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> – If the index is not a <code class="xref py py-class docutils literal notranslate"><span class="pre">DatetimeIndex</span></code></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.first" title="apache_beam.dataframe.frames.DeferredDataFrame.first"><code class="xref py py-meth docutils literal notranslate"><span class="pre">first()</span></code></a></dt>
<dd>Select initial periods of time series based on a date offset.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.at_time" title="apache_beam.dataframe.frames.DeferredDataFrame.at_time"><code class="xref py py-meth docutils literal notranslate"><span class="pre">at_time()</span></code></a></dt>
<dd>Select values at a particular time of the day.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.between_time" title="apache_beam.dataframe.frames.DeferredDataFrame.between_time"><code class="xref py py-meth docutils literal notranslate"><span class="pre">between_time()</span></code></a></dt>
<dd>Select values between particular times of the day.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">i</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s1">&#39;2018-04-09&#39;</span><span class="p">,</span> <span class="n">periods</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">freq</span><span class="o">=</span><span class="s1">&#39;2D&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span> <span class="n">index</span><span class="o">=</span><span class="n">i</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span>
<span class="go"> A</span>
<span class="go">2018-04-09 1</span>
<span class="go">2018-04-11 2</span>
<span class="go">2018-04-13 3</span>
<span class="go">2018-04-15 4</span>
<span class="go">Get the rows for the last 3 days:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">ts</span><span class="o">.</span><span class="n">last</span><span class="p">(</span><span class="s1">&#39;3D&#39;</span><span class="p">)</span>
<span class="go"> A</span>
<span class="go">2018-04-13 3</span>
<span class="go">2018-04-15 4</span>
<span class="go">Notice the data for 3 last calendar days were returned, not the last</span>
<span class="go">3 observed days in the dataset, and therefore data for 2018-04-11 was</span>
<span class="go">not returned.</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.le">
<code class="descname">le</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.le" title="Permalink to this definition"></a></dt>
<dd><p>Get Less than or equal to of dataframe and other, element-wise (binary operator <cite>le</cite>).</p>
<p>Among flexible wrappers (<cite>eq</cite>, <cite>ne</cite>, <cite>le</cite>, <cite>lt</cite>, <cite>ge</cite>, <cite>gt</cite>) to comparison
operators.</p>
<p>Equivalent to <cite>==</cite>, <cite>!=</cite>, <cite>&lt;=</cite>, <cite>&lt;</cite>, <cite>&gt;=</cite>, <cite>&gt;</cite> with support to choose axis
(rows or columns) and level for comparison.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 'columns'</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’).</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the passed
MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the comparison.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">DeferredDataFrame of bool</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eq" title="apache_beam.dataframe.frames.DeferredDataFrame.eq"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.eq()</span></code></a></dt>
<dd>Compare DeferredDataFrames for equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.ne" title="apache_beam.dataframe.frames.DeferredDataFrame.ne"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.ne()</span></code></a></dt>
<dd>Compare DeferredDataFrames for inequality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.le" title="apache_beam.dataframe.frames.DeferredDataFrame.le"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.le()</span></code></a></dt>
<dd>Compare DeferredDataFrames for less than inequality or equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.lt" title="apache_beam.dataframe.frames.DeferredDataFrame.lt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.lt()</span></code></a></dt>
<dd>Compare DeferredDataFrames for strictly less than inequality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.ge" title="apache_beam.dataframe.frames.DeferredDataFrame.ge"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.ge()</span></code></a></dt>
<dd>Compare DeferredDataFrames for greater than inequality or equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.gt" title="apache_beam.dataframe.frames.DeferredDataFrame.gt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.gt()</span></code></a></dt>
<dd>Compare DeferredDataFrames for strictly greater than inequality elementwise.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.
<cite>NaN</cite> values are considered different (i.e. <cite>NaN</cite> != <cite>NaN</cite>).</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;cost&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">300</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> cost revenue</span>
<span class="go">A 250 100</span>
<span class="go">B 150 250</span>
<span class="go">C 100 300</span>
<span class="go">Comparison with a scalar, using either the operator or method:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">==</span> <span class="mi">100</span>
<span class="go"> cost revenue</span>
<span class="go">A False True</span>
<span class="go">B False False</span>
<span class="go">C True False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eq</span><span class="p">(</span><span class="mi">100</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A False True</span>
<span class="go">B False False</span>
<span class="go">C True False</span>
<span class="go">When `other` is a :class:`Series`, the columns of a DataFrame are aligned</span>
<span class="go">with the index of `other` and broadcast:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">!=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;cost&quot;</span><span class="p">,</span> <span class="s2">&quot;revenue&quot;</span><span class="p">])</span>
<span class="go"> cost revenue</span>
<span class="go">A True True</span>
<span class="go">B True False</span>
<span class="go">C False True</span>
<span class="go">Use the method to control the broadcast axis:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">ne</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">100</span><span class="p">,</span> <span class="mi">300</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;A&quot;</span><span class="p">,</span> <span class="s2">&quot;D&quot;</span><span class="p">]),</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A True False</span>
<span class="go">B True True</span>
<span class="go">C True True</span>
<span class="go">D True True</span>
<span class="go">When comparing to an arbitrary sequence, the number of columns must</span>
<span class="go">match the number elements in `other`:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">==</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">]</span>
<span class="go"> cost revenue</span>
<span class="go">A True True</span>
<span class="go">B False False</span>
<span class="go">C False False</span>
<span class="go">Use the method to control the axis:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eq</span><span class="p">([</span><span class="mi">250</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A True False</span>
<span class="go">B False True</span>
<span class="go">C True False</span>
<span class="go">Compare to a DataFrame of different shape.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">300</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">150</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;D&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> revenue</span>
<span class="go">A 300</span>
<span class="go">B 250</span>
<span class="go">C 100</span>
<span class="go">D 150</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">gt</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A False False</span>
<span class="go">B False False</span>
<span class="go">C False True</span>
<span class="go">D False False</span>
<span class="go">Compare to a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;cost&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">300</span><span class="p">,</span> <span class="mi">220</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">300</span><span class="p">,</span> <span class="mi">200</span><span class="p">,</span> <span class="mi">175</span><span class="p">,</span> <span class="mi">225</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> cost revenue</span>
<span class="go">Q1 A 250 100</span>
<span class="go"> B 150 250</span>
<span class="go"> C 100 300</span>
<span class="go">Q2 A 150 200</span>
<span class="go"> B 300 175</span>
<span class="go"> C 220 225</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">le</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">Q1 A True True</span>
<span class="go"> B True True</span>
<span class="go"> C True True</span>
<span class="go">Q2 A False True</span>
<span class="go"> B True False</span>
<span class="go"> C True False</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.length">
<code class="descname">length</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.length" title="Permalink to this definition"></a></dt>
<dd><p>Alternative to <code class="docutils literal notranslate"><span class="pre">len(df)</span></code> which returns a deferred result that can be
used in arithmetic with <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredSeries</span></code></a> or
<a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredDataFrame</span></code></a> instances.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.loc">
<code class="descname">loc</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.loc" title="Permalink to this definition"></a></dt>
<dd><p>Access a group of rows and columns by label(s) or a boolean array.</p>
<p><code class="docutils literal notranslate"><span class="pre">.loc[]</span></code> is primarily label based, but may also be used with a
boolean array.</p>
<p>Allowed inputs are:</p>
<ul>
<li><p class="first">A single label, e.g. <code class="docutils literal notranslate"><span class="pre">5</span></code> or <code class="docutils literal notranslate"><span class="pre">'a'</span></code>, (note that <code class="docutils literal notranslate"><span class="pre">5</span></code> is
interpreted as a <em>label</em> of the index, and <strong>never</strong> as an
integer position along the index).</p>
</li>
<li><p class="first">A list or array of labels, e.g. <code class="docutils literal notranslate"><span class="pre">['a',</span> <span class="pre">'b',</span> <span class="pre">'c']</span></code>.</p>
</li>
<li><p class="first">A slice object with labels, e.g. <code class="docutils literal notranslate"><span class="pre">'a':'f'</span></code>.</p>
<div class="admonition warning">
<p class="first admonition-title">Warning</p>
<p class="last">Note that contrary to usual python slices, <strong>both</strong> the
start and the stop are included</p>
</div>
</li>
<li><p class="first">A boolean array of the same length as the axis being sliced,
e.g. <code class="docutils literal notranslate"><span class="pre">[True,</span> <span class="pre">False,</span> <span class="pre">True]</span></code>.</p>
</li>
<li><p class="first">An alignable boolean Series. The index of the key will be aligned before
masking.</p>
</li>
<li><p class="first">An alignable Index. The Index of the returned selection will be the input.</p>
</li>
<li><p class="first">A <code class="docutils literal notranslate"><span class="pre">callable</span></code> function with one argument (the calling Series or
DataFrame) and that returns valid output for indexing (one of the above)</p>
</li>
</ul>
<p>See more at <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/indexing.html#indexing-label" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">Selection by Label</span></a>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Raises:</th><td class="field-body"><ul class="first last simple">
<li><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#KeyError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">KeyError</span></code></a> – If any items are not found.</li>
<li><code class="xref py py-exc docutils literal notranslate"><span class="pre">IndexingError</span></code> – If an indexed key is passed and its index is unalignable to the frame index.</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.at" title="apache_beam.dataframe.frames.DeferredDataFrame.at"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.at</span></code></a></dt>
<dd>Access a single value for a row/column label pair.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.iloc" title="apache_beam.dataframe.frames.DeferredDataFrame.iloc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.iloc</span></code></a></dt>
<dd>Access group of rows and columns by integer position(s).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.xs" title="apache_beam.dataframe.frames.DeferredDataFrame.xs"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredDataFrame.xs</span></code></a></dt>
<dd>Returns a cross-section (row(s) or column(s)) from the DeferredSeries/DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.loc" title="apache_beam.dataframe.frames.DeferredSeries.loc"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DeferredSeries.loc</span></code></a></dt>
<dd>Access group of values using labels.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">**Getting values**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">],</span> <span class="p">[</span><span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;sidewinder&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;max_speed&#39;</span><span class="p">,</span> <span class="s1">&#39;shield&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra 1 2</span>
<span class="go">viper 4 5</span>
<span class="go">sidewinder 7 8</span>
<span class="go">Single label. Note this returns the row as a Series.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">&#39;viper&#39;</span><span class="p">]</span>
<span class="go">max_speed 4</span>
<span class="go">shield 5</span>
<span class="go">Name: viper, dtype: int64</span>
<span class="go">List of labels. Note using ``[[]]`` returns a DataFrame.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[[</span><span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;sidewinder&#39;</span><span class="p">]]</span>
<span class="go"> max_speed shield</span>
<span class="go">viper 4 5</span>
<span class="go">sidewinder 7 8</span>
<span class="go">Single label for row and column</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;shield&#39;</span><span class="p">]</span>
<span class="go">2</span>
<span class="go">Slice with labels for row and single label for column. As mentioned</span>
<span class="go">above, note that both the start and stop of the slice are included.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">&#39;cobra&#39;</span><span class="p">:</span><span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;max_speed&#39;</span><span class="p">]</span>
<span class="go">cobra 1</span>
<span class="go">viper 4</span>
<span class="go">Name: max_speed, dtype: int64</span>
<span class="go">Boolean list with the same length as the row axis</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[[</span><span class="kc">False</span><span class="p">,</span> <span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">]]</span>
<span class="go"> max_speed shield</span>
<span class="go">sidewinder 7 8</span>
<span class="go">Alignable boolean Series:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="kc">False</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;sidewinder&#39;</span><span class="p">,</span> <span class="s1">&#39;cobra&#39;</span><span class="p">])]</span>
<span class="go"> max_speed shield</span>
<span class="go">sidewinder 7 8</span>
<span class="go">Index (same behavior as ``df.reindex``)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">pd</span><span class="o">.</span><span class="n">Index</span><span class="p">([</span><span class="s2">&quot;cobra&quot;</span><span class="p">,</span> <span class="s2">&quot;viper&quot;</span><span class="p">],</span> <span class="n">name</span><span class="o">=</span><span class="s2">&quot;foo&quot;</span><span class="p">)]</span>
<span class="go"> max_speed shield</span>
<span class="go">foo</span>
<span class="go">cobra 1 2</span>
<span class="go">viper 4 5</span>
<span class="go">Conditional that returns a boolean Series</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">df</span><span class="p">[</span><span class="s1">&#39;shield&#39;</span><span class="p">]</span> <span class="o">&gt;</span> <span class="mi">6</span><span class="p">]</span>
<span class="go"> max_speed shield</span>
<span class="go">sidewinder 7 8</span>
<span class="go">Conditional that returns a boolean Series with column labels specified</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">df</span><span class="p">[</span><span class="s1">&#39;shield&#39;</span><span class="p">]</span> <span class="o">&gt;</span> <span class="mi">6</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;max_speed&#39;</span><span class="p">]]</span>
<span class="go"> max_speed</span>
<span class="go">sidewinder 7</span>
<span class="go">Callable that returns a boolean Series</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="k">lambda</span> <span class="n">df</span><span class="p">:</span> <span class="n">df</span><span class="p">[</span><span class="s1">&#39;shield&#39;</span><span class="p">]</span> <span class="o">==</span> <span class="mi">8</span><span class="p">]</span>
<span class="go"> max_speed shield</span>
<span class="go">sidewinder 7 8</span>
<span class="go">**Setting values**</span>
<span class="go">Set value for all items matching the list of labels</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[[</span><span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;sidewinder&#39;</span><span class="p">],</span> <span class="p">[</span><span class="s1">&#39;shield&#39;</span><span class="p">]]</span> <span class="o">=</span> <span class="mi">50</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra 1 2</span>
<span class="go">viper 4 50</span>
<span class="go">sidewinder 7 50</span>
<span class="go">Set value for an entire row</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">&#39;cobra&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="mi">10</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra 10 10</span>
<span class="go">viper 4 50</span>
<span class="go">sidewinder 7 50</span>
<span class="go">Set value for an entire column</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[:,</span> <span class="s1">&#39;max_speed&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="mi">30</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra 30 10</span>
<span class="go">viper 30 50</span>
<span class="go">sidewinder 30 50</span>
<span class="go">Set value for rows matching callable condition</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="n">df</span><span class="p">[</span><span class="s1">&#39;shield&#39;</span><span class="p">]</span> <span class="o">&gt;</span> <span class="mi">35</span><span class="p">]</span> <span class="o">=</span> <span class="mi">0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra 30 10</span>
<span class="go">viper 0 0</span>
<span class="go">sidewinder 0 0</span>
<span class="go">**Getting values on a DataFrame with an index that has integer labels**</span>
<span class="go">Another example using integers for the index</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">],</span> <span class="p">[</span><span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">],</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;max_speed&#39;</span><span class="p">,</span> <span class="s1">&#39;shield&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> max_speed shield</span>
<span class="go">7 1 2</span>
<span class="go">8 4 5</span>
<span class="go">9 7 8</span>
<span class="go">Slice with integer labels for rows. As mentioned above, note that both</span>
<span class="go">the start and stop of the slice are included.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="mi">7</span><span class="p">:</span><span class="mi">9</span><span class="p">]</span>
<span class="go"> max_speed shield</span>
<span class="go">7 1 2</span>
<span class="go">8 4 5</span>
<span class="go">9 7 8</span>
<span class="go">**Getting values with a MultiIndex**</span>
<span class="go">A number of examples using a DataFrame with a MultiIndex</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">tuples</span> <span class="o">=</span> <span class="p">[</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark i&#39;</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark ii&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;sidewinder&#39;</span><span class="p">,</span> <span class="s1">&#39;mark i&#39;</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;sidewinder&#39;</span><span class="p">,</span> <span class="s1">&#39;mark ii&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;mark ii&#39;</span><span class="p">),</span> <span class="p">(</span><span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;mark iii&#39;</span><span class="p">)</span>
<span class="gp">... </span><span class="p">]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">index</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_tuples</span><span class="p">(</span><span class="n">tuples</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">values</span> <span class="o">=</span> <span class="p">[[</span><span class="mi">12</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="p">[</span><span class="mi">10</span><span class="p">,</span> <span class="mi">20</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span> <span class="p">[</span><span class="mi">7</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="p">[</span><span class="mi">16</span><span class="p">,</span> <span class="mi">36</span><span class="p">]]</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">values</span><span class="p">,</span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;max_speed&#39;</span><span class="p">,</span> <span class="s1">&#39;shield&#39;</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="n">index</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra mark i 12 2</span>
<span class="go"> mark ii 0 4</span>
<span class="go">sidewinder mark i 10 20</span>
<span class="go"> mark ii 1 4</span>
<span class="go">viper mark ii 7 1</span>
<span class="go"> mark iii 16 36</span>
<span class="go">Single label. Note this returns a DataFrame with a single index.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">&#39;cobra&#39;</span><span class="p">]</span>
<span class="go"> max_speed shield</span>
<span class="go">mark i 12 2</span>
<span class="go">mark ii 0 4</span>
<span class="go">Single index tuple. Note this returns a Series.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[(</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark ii&#39;</span><span class="p">)]</span>
<span class="go">max_speed 0</span>
<span class="go">shield 4</span>
<span class="go">Name: (cobra, mark ii), dtype: int64</span>
<span class="go">Single label for row and column. Similar to passing in a tuple, this</span>
<span class="go">returns a Series.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark i&#39;</span><span class="p">]</span>
<span class="go">max_speed 12</span>
<span class="go">shield 2</span>
<span class="go">Name: (cobra, mark i), dtype: int64</span>
<span class="go">Single tuple. Note using ``[[]]`` returns a DataFrame.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[[(</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark ii&#39;</span><span class="p">)]]</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra mark ii 0 4</span>
<span class="go">Single tuple for the index with a single label for the column</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[(</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark i&#39;</span><span class="p">),</span> <span class="s1">&#39;shield&#39;</span><span class="p">]</span>
<span class="go">2</span>
<span class="go">Slice from index tuple to single label</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[(</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark i&#39;</span><span class="p">):</span><span class="s1">&#39;viper&#39;</span><span class="p">]</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra mark i 12 2</span>
<span class="go"> mark ii 0 4</span>
<span class="go">sidewinder mark i 10 20</span>
<span class="go"> mark ii 1 4</span>
<span class="go">viper mark ii 7 1</span>
<span class="go"> mark iii 16 36</span>
<span class="go">Slice from index tuple to index tuple</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[(</span><span class="s1">&#39;cobra&#39;</span><span class="p">,</span> <span class="s1">&#39;mark i&#39;</span><span class="p">):(</span><span class="s1">&#39;viper&#39;</span><span class="p">,</span> <span class="s1">&#39;mark ii&#39;</span><span class="p">)]</span>
<span class="go"> max_speed shield</span>
<span class="go">cobra mark i 12 2</span>
<span class="go"> mark ii 0 4</span>
<span class="go">sidewinder mark i 10 20</span>
<span class="go"> mark ii 1 4</span>
<span class="go">viper mark ii 7 1</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.lt">
<code class="descname">lt</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.lt" title="Permalink to this definition"></a></dt>
<dd><p>Get Less than of dataframe and other, element-wise (binary operator <cite>lt</cite>).</p>
<p>Among flexible wrappers (<cite>eq</cite>, <cite>ne</cite>, <cite>le</cite>, <cite>lt</cite>, <cite>ge</cite>, <cite>gt</cite>) to comparison
operators.</p>
<p>Equivalent to <cite>==</cite>, <cite>!=</cite>, <cite>&lt;=</cite>, <cite>&lt;</cite>, <cite>&gt;=</cite>, <cite>&gt;</cite> with support to choose axis
(rows or columns) and level for comparison.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 'columns'</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’).</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the passed
MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the comparison.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">DeferredDataFrame of bool</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eq" title="apache_beam.dataframe.frames.DeferredDataFrame.eq"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.eq()</span></code></a></dt>
<dd>Compare DeferredDataFrames for equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.ne" title="apache_beam.dataframe.frames.DeferredDataFrame.ne"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.ne()</span></code></a></dt>
<dd>Compare DeferredDataFrames for inequality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.le" title="apache_beam.dataframe.frames.DeferredDataFrame.le"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.le()</span></code></a></dt>
<dd>Compare DeferredDataFrames for less than inequality or equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.lt" title="apache_beam.dataframe.frames.DeferredDataFrame.lt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.lt()</span></code></a></dt>
<dd>Compare DeferredDataFrames for strictly less than inequality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.ge" title="apache_beam.dataframe.frames.DeferredDataFrame.ge"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.ge()</span></code></a></dt>
<dd>Compare DeferredDataFrames for greater than inequality or equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.gt" title="apache_beam.dataframe.frames.DeferredDataFrame.gt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.gt()</span></code></a></dt>
<dd>Compare DeferredDataFrames for strictly greater than inequality elementwise.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.
<cite>NaN</cite> values are considered different (i.e. <cite>NaN</cite> != <cite>NaN</cite>).</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;cost&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">300</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> cost revenue</span>
<span class="go">A 250 100</span>
<span class="go">B 150 250</span>
<span class="go">C 100 300</span>
<span class="go">Comparison with a scalar, using either the operator or method:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">==</span> <span class="mi">100</span>
<span class="go"> cost revenue</span>
<span class="go">A False True</span>
<span class="go">B False False</span>
<span class="go">C True False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eq</span><span class="p">(</span><span class="mi">100</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A False True</span>
<span class="go">B False False</span>
<span class="go">C True False</span>
<span class="go">When `other` is a :class:`Series`, the columns of a DataFrame are aligned</span>
<span class="go">with the index of `other` and broadcast:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">!=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;cost&quot;</span><span class="p">,</span> <span class="s2">&quot;revenue&quot;</span><span class="p">])</span>
<span class="go"> cost revenue</span>
<span class="go">A True True</span>
<span class="go">B True False</span>
<span class="go">C False True</span>
<span class="go">Use the method to control the broadcast axis:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">ne</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">100</span><span class="p">,</span> <span class="mi">300</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;A&quot;</span><span class="p">,</span> <span class="s2">&quot;D&quot;</span><span class="p">]),</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A True False</span>
<span class="go">B True True</span>
<span class="go">C True True</span>
<span class="go">D True True</span>
<span class="go">When comparing to an arbitrary sequence, the number of columns must</span>
<span class="go">match the number elements in `other`:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">==</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">]</span>
<span class="go"> cost revenue</span>
<span class="go">A True True</span>
<span class="go">B False False</span>
<span class="go">C False False</span>
<span class="go">Use the method to control the axis:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eq</span><span class="p">([</span><span class="mi">250</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A True False</span>
<span class="go">B False True</span>
<span class="go">C True False</span>
<span class="go">Compare to a DataFrame of different shape.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">300</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">150</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;D&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> revenue</span>
<span class="go">A 300</span>
<span class="go">B 250</span>
<span class="go">C 100</span>
<span class="go">D 150</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">gt</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A False False</span>
<span class="go">B False False</span>
<span class="go">C False True</span>
<span class="go">D False False</span>
<span class="go">Compare to a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;cost&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">300</span><span class="p">,</span> <span class="mi">220</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">300</span><span class="p">,</span> <span class="mi">200</span><span class="p">,</span> <span class="mi">175</span><span class="p">,</span> <span class="mi">225</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> cost revenue</span>
<span class="go">Q1 A 250 100</span>
<span class="go"> B 150 250</span>
<span class="go"> C 100 300</span>
<span class="go">Q2 A 150 200</span>
<span class="go"> B 300 175</span>
<span class="go"> C 220 225</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">le</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">Q1 A True True</span>
<span class="go"> B True True</span>
<span class="go"> C True True</span>
<span class="go">Q2 A False True</span>
<span class="go"> B True False</span>
<span class="go"> C True False</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.mask">
<code class="descname">mask</code><span class="sig-paren">(</span><em>cond</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.mask" title="Permalink to this definition"></a></dt>
<dd><p>mask is not parallelizable when <code class="docutils literal notranslate"><span class="pre">errors=&quot;ignore&quot;</span></code> is specified.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.mod">
<code class="descname">mod</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="Permalink to this definition"></a></dt>
<dd><p>Get Modulo of dataframe and other, element-wise (binary operator <cite>mod</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">dataframe</span> <span class="pre">%</span> <span class="pre">other</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>rmod</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.mul">
<code class="descname">mul</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="Permalink to this definition"></a></dt>
<dd><p>Get Multiplication of dataframe and other, element-wise (binary operator <cite>mul</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">dataframe</span> <span class="pre">*</span> <span class="pre">other</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>rmul</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.multiply">
<code class="descname">multiply</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.multiply" title="Permalink to this definition"></a></dt>
<dd><p>Get Multiplication of dataframe and other, element-wise (binary operator <cite>mul</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">dataframe</span> <span class="pre">*</span> <span class="pre">other</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>rmul</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.ndim">
<code class="descname">ndim</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.ndim" title="Permalink to this definition"></a></dt>
<dd><p>Return an int representing the number of axes / array dimensions.</p>
<p>Return 1 if Series. Otherwise return 2 if DataFrame.</p>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">ndarray.ndim</span></code></dt>
<dd>Number of array dimensions.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">({</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="mi">3</span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">ndim</span>
<span class="go">1</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">ndim</span>
<span class="go">2</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.ne">
<code class="descname">ne</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.ne" title="Permalink to this definition"></a></dt>
<dd><p>Get Not equal to of dataframe and other, element-wise (binary operator <cite>ne</cite>).</p>
<p>Among flexible wrappers (<cite>eq</cite>, <cite>ne</cite>, <cite>le</cite>, <cite>lt</cite>, <cite>ge</cite>, <cite>gt</cite>) to comparison
operators.</p>
<p>Equivalent to <cite>==</cite>, <cite>!=</cite>, <cite>&lt;=</cite>, <cite>&lt;</cite>, <cite>&gt;=</cite>, <cite>&gt;</cite> with support to choose axis
(rows or columns) and level for comparison.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 'columns'</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’).</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the passed
MultiIndex level.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the comparison.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">DeferredDataFrame of bool</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.eq" title="apache_beam.dataframe.frames.DeferredDataFrame.eq"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.eq()</span></code></a></dt>
<dd>Compare DeferredDataFrames for equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.ne" title="apache_beam.dataframe.frames.DeferredDataFrame.ne"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.ne()</span></code></a></dt>
<dd>Compare DeferredDataFrames for inequality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.le" title="apache_beam.dataframe.frames.DeferredDataFrame.le"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.le()</span></code></a></dt>
<dd>Compare DeferredDataFrames for less than inequality or equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.lt" title="apache_beam.dataframe.frames.DeferredDataFrame.lt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.lt()</span></code></a></dt>
<dd>Compare DeferredDataFrames for strictly less than inequality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.ge" title="apache_beam.dataframe.frames.DeferredDataFrame.ge"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.ge()</span></code></a></dt>
<dd>Compare DeferredDataFrames for greater than inequality or equality elementwise.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.gt" title="apache_beam.dataframe.frames.DeferredDataFrame.gt"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.gt()</span></code></a></dt>
<dd>Compare DeferredDataFrames for strictly greater than inequality elementwise.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.
<cite>NaN</cite> values are considered different (i.e. <cite>NaN</cite> != <cite>NaN</cite>).</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;cost&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">300</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> cost revenue</span>
<span class="go">A 250 100</span>
<span class="go">B 150 250</span>
<span class="go">C 100 300</span>
<span class="go">Comparison with a scalar, using either the operator or method:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">==</span> <span class="mi">100</span>
<span class="go"> cost revenue</span>
<span class="go">A False True</span>
<span class="go">B False False</span>
<span class="go">C True False</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eq</span><span class="p">(</span><span class="mi">100</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A False True</span>
<span class="go">B False False</span>
<span class="go">C True False</span>
<span class="go">When `other` is a :class:`Series`, the columns of a DataFrame are aligned</span>
<span class="go">with the index of `other` and broadcast:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">!=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;cost&quot;</span><span class="p">,</span> <span class="s2">&quot;revenue&quot;</span><span class="p">])</span>
<span class="go"> cost revenue</span>
<span class="go">A True True</span>
<span class="go">B True False</span>
<span class="go">C False True</span>
<span class="go">Use the method to control the broadcast axis:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">ne</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">100</span><span class="p">,</span> <span class="mi">300</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;A&quot;</span><span class="p">,</span> <span class="s2">&quot;D&quot;</span><span class="p">]),</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A True False</span>
<span class="go">B True True</span>
<span class="go">C True True</span>
<span class="go">D True True</span>
<span class="go">When comparing to an arbitrary sequence, the number of columns must</span>
<span class="go">match the number elements in `other`:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">==</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">]</span>
<span class="go"> cost revenue</span>
<span class="go">A True True</span>
<span class="go">B False False</span>
<span class="go">C False False</span>
<span class="go">Use the method to control the axis:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">eq</span><span class="p">([</span><span class="mi">250</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A True False</span>
<span class="go">B False True</span>
<span class="go">C True False</span>
<span class="go">Compare to a DataFrame of different shape.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">300</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">150</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;D&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> revenue</span>
<span class="go">A 300</span>
<span class="go">B 250</span>
<span class="go">C 100</span>
<span class="go">D 150</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">gt</span><span class="p">(</span><span class="n">other</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">A False False</span>
<span class="go">B False False</span>
<span class="go">C False True</span>
<span class="go">D False False</span>
<span class="go">Compare to a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;cost&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">250</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">300</span><span class="p">,</span> <span class="mi">220</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;revenue&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">250</span><span class="p">,</span> <span class="mi">300</span><span class="p">,</span> <span class="mi">200</span><span class="p">,</span> <span class="mi">175</span><span class="p">,</span> <span class="mi">225</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q1&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">,</span> <span class="s1">&#39;Q2&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> cost revenue</span>
<span class="go">Q1 A 250 100</span>
<span class="go"> B 150 250</span>
<span class="go"> C 100 300</span>
<span class="go">Q2 A 150 200</span>
<span class="go"> B 300 175</span>
<span class="go"> C 220 225</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">le</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> cost revenue</span>
<span class="go">Q1 A True True</span>
<span class="go"> B True True</span>
<span class="go"> C True True</span>
<span class="go">Q2 A False True</span>
<span class="go"> B True False</span>
<span class="go"> C True False</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.pad">
<code class="descname">pad</code><span class="sig-paren">(</span><em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.pad" title="Permalink to this definition"></a></dt>
<dd><p>Synonym for <code class="xref py py-meth docutils literal notranslate"><span class="pre">DataFrame.fillna()</span></code> with <code class="docutils literal notranslate"><span class="pre">method='ffill'</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Returns:</th><td class="field-body">Object with missing values filled or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</td>
</tr>
<tr class="field-even field"><th class="field-name">Return type:</th><td class="field-body">DeferredSeries/DeferredDataFrame or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>pad is only supported for axis=”columns”. axis=”index” is order-sensitive.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.pipe">
<code class="descname">pipe</code><span class="sig-paren">(</span><em>func</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.pipe" title="Permalink to this definition"></a></dt>
<dd><p>Apply func(self, *args, **kwargs).</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>func</strong> (<em>function</em>) – Function to apply to the DeferredSeries/DeferredDataFrame.
<code class="docutils literal notranslate"><span class="pre">args</span></code>, and <code class="docutils literal notranslate"><span class="pre">kwargs</span></code> are passed into <code class="docutils literal notranslate"><span class="pre">func</span></code>.
Alternatively a <code class="docutils literal notranslate"><span class="pre">(callable,</span> <span class="pre">data_keyword)</span></code> tuple where
<code class="docutils literal notranslate"><span class="pre">data_keyword</span></code> is a string indicating the keyword of
<code class="docutils literal notranslate"><span class="pre">callable</span></code> that expects the DeferredSeries/DeferredDataFrame.</li>
<li><strong>args</strong> (<em>iterable</em><em>, </em><em>optional</em>) – Positional arguments passed into <code class="docutils literal notranslate"><span class="pre">func</span></code>.</li>
<li><strong>kwargs</strong> (<em>mapping</em><em>, </em><em>optional</em>) – A dictionary of keyword arguments passed into <code class="docutils literal notranslate"><span class="pre">func</span></code>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"><strong>object</strong></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">the return type of <code class="docutils literal notranslate"><span class="pre">func</span></code>.</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.apply" title="apache_beam.dataframe.frames.DeferredDataFrame.apply"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.apply()</span></code></a></dt>
<dd>Apply a function along input axis of DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.applymap" title="apache_beam.dataframe.frames.DeferredDataFrame.applymap"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.applymap()</span></code></a></dt>
<dd>Apply a function elementwise on a whole DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.map" title="apache_beam.dataframe.frames.DeferredSeries.map"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.map()</span></code></a></dt>
<dd>Apply a mapping correspondence on a <code class="xref py py-class docutils literal notranslate"><span class="pre">DeferredSeries</span></code>.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Use <code class="docutils literal notranslate"><span class="pre">.pipe</span></code> when chaining together functions that expect
DeferredSeries, DeferredDataFrames or GroupBy objects. Instead of writing</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">func</span><span class="p">(</span><span class="n">g</span><span class="p">(</span><span class="n">h</span><span class="p">(</span><span class="n">df</span><span class="p">),</span> <span class="n">arg1</span><span class="o">=</span><span class="n">a</span><span class="p">),</span> <span class="n">arg2</span><span class="o">=</span><span class="n">b</span><span class="p">,</span> <span class="n">arg3</span><span class="o">=</span><span class="n">c</span><span class="p">)</span> <span class="c1"># doctest: +SKIP</span>
</pre></div>
</div>
<p>You can write</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">pipe</span><span class="p">(</span><span class="n">h</span><span class="p">)</span>
<span class="gp">... </span> <span class="o">.</span><span class="n">pipe</span><span class="p">(</span><span class="n">g</span><span class="p">,</span> <span class="n">arg1</span><span class="o">=</span><span class="n">a</span><span class="p">)</span>
<span class="gp">... </span> <span class="o">.</span><span class="n">pipe</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">arg2</span><span class="o">=</span><span class="n">b</span><span class="p">,</span> <span class="n">arg3</span><span class="o">=</span><span class="n">c</span><span class="p">)</span>
<span class="gp">... </span><span class="p">)</span> <span class="c1"># doctest: +SKIP</span>
</pre></div>
</div>
<p>If you have a function that takes the data as (say) the second
argument, pass a tuple indicating which keyword expects the
data. For example, suppose <code class="docutils literal notranslate"><span class="pre">f</span></code> takes its data as <code class="docutils literal notranslate"><span class="pre">arg2</span></code>:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">pipe</span><span class="p">(</span><span class="n">h</span><span class="p">)</span>
<span class="gp">... </span> <span class="o">.</span><span class="n">pipe</span><span class="p">(</span><span class="n">g</span><span class="p">,</span> <span class="n">arg1</span><span class="o">=</span><span class="n">a</span><span class="p">)</span>
<span class="gp">... </span> <span class="o">.</span><span class="n">pipe</span><span class="p">((</span><span class="n">func</span><span class="p">,</span> <span class="s1">&#39;arg2&#39;</span><span class="p">),</span> <span class="n">arg1</span><span class="o">=</span><span class="n">a</span><span class="p">,</span> <span class="n">arg3</span><span class="o">=</span><span class="n">c</span><span class="p">)</span>
<span class="gp">... </span> <span class="p">)</span> <span class="c1"># doctest: +SKIP</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.pivot">
<code class="descname">pivot</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.pivot" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.pivot.html#pandas.DataFrame.pivot" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.pivot()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘pivot’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.pivot_table">
<code class="descname">pivot_table</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.pivot_table" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.pivot_table.html#pandas.DataFrame.pivot_table" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.pivot_table()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘pivot_table’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.pow">
<code class="descname">pow</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="Permalink to this definition"></a></dt>
<dd><p>Get Exponential power of dataframe and other, element-wise (binary operator <cite>pow</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">dataframe</span> <span class="pre">**</span> <span class="pre">other</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>rpow</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.radd">
<code class="descname">radd</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.radd" title="Permalink to this definition"></a></dt>
<dd><p>Get Addition of dataframe and other, element-wise (binary operator <cite>radd</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">+</span> <span class="pre">dataframe</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>add</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.rank">
<code class="descname">rank</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.rank" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.rank.html#pandas.DataFrame.rank" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.rank()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘rank’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.rdiv">
<code class="descname">rdiv</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.rdiv" title="Permalink to this definition"></a></dt>
<dd><p>Get Floating division of dataframe and other, element-wise (binary operator <cite>rtruediv</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">/</span> <span class="pre">dataframe</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>truediv</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.reindex">
<code class="descname">reindex</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.reindex" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.reindex.html#pandas.DataFrame.reindex" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.reindex()</span></code></a> is not yet supported in the Beam DataFrame API because it is sensitive to the order of the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.reindex_like">
<code class="descname">reindex_like</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.reindex_like" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.reindex_like.html#pandas.DataFrame.reindex_like" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.reindex_like()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘reindex_like’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.reorder_levels">
<code class="descname">reorder_levels</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.reorder_levels" title="Permalink to this definition"></a></dt>
<dd><p>Rearrange index levels using input order. May not drop or duplicate levels.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>order</strong> (<em>list of int</em><em> or </em><em>list of str</em>) – List representing new level order. Reference level by number
(position) or by key (label).</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – Where to reorder levels.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.replace">
<code class="descname">replace</code><span class="sig-paren">(</span><em>to_replace</em>, <em>value</em>, <em>limit</em>, <em>method</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.replace" title="Permalink to this definition"></a></dt>
<dd><p>Replace values given in <cite>to_replace</cite> with <cite>value</cite>.</p>
<p>Values of the DataFrame are replaced with other values dynamically.</p>
<p>This differs from updating with <code class="docutils literal notranslate"><span class="pre">.loc</span></code> or <code class="docutils literal notranslate"><span class="pre">.iloc</span></code>, which require
you to specify a location to update with some value.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>to_replace</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>regex</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em>, or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a>) – <p>How to find the values that will be replaced.</p>
<ul>
<li>numeric, str or regex:<blockquote>
<div><ul>
<li><dl class="first docutils">
<dt>numeric: numeric values equal to <cite>to_replace</cite> will be</dt>
<dd>replaced with <cite>value</cite></dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>str: string exactly matching <cite>to_replace</cite> will be replaced</dt>
<dd>with <cite>value</cite></dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>regex: regexs matching <cite>to_replace</cite> will be replaced with</dt>
<dd><cite>value</cite></dd>
</dl>
</li>
</ul>
</div></blockquote>
</li>
<li>list of str, regex, or numeric:<blockquote>
<div><ul>
<li><dl class="first docutils">
<dt>First, if <cite>to_replace</cite> and <cite>value</cite> are both lists, they</dt>
<dd><strong>must</strong> be the same length.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>Second, if <code class="docutils literal notranslate"><span class="pre">regex=True</span></code> then all of the strings in <strong>both</strong></dt>
<dd>lists will be interpreted as regexs otherwise they will match
directly. This doesn’t matter much for <cite>value</cite> since there
are only a few possible substitution regexes you can use.</dd>
</dl>
</li>
<li>str, regex and numeric rules apply as above.</li>
</ul>
</div></blockquote>
</li>
<li>dict:<blockquote>
<div><ul>
<li><dl class="first docutils">
<dt>Dicts can be used to specify different replacement values</dt>
<dd>for different existing values. For example,
<code class="docutils literal notranslate"><span class="pre">{'a':</span> <span class="pre">'b',</span> <span class="pre">'y':</span> <span class="pre">'z'}</span></code> replaces the value ‘a’ with ‘b’ and
‘y’ with ‘z’. To use a dict in this way the <cite>value</cite>
parameter should be <cite>None</cite>.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>For a DeferredDataFrame a dict can specify that different values</dt>
<dd>should be replaced in different columns. For example,
<code class="docutils literal notranslate"><span class="pre">{'a':</span> <span class="pre">1,</span> <span class="pre">'b':</span> <span class="pre">'z'}</span></code> looks for the value 1 in column ‘a’
and the value ‘z’ in column ‘b’ and replaces these values
with whatever is specified in <cite>value</cite>. The <cite>value</cite> parameter
should not be <code class="docutils literal notranslate"><span class="pre">None</span></code> in this case. You can treat this as a
special case of passing two lists except that you are
specifying the column to search in.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>For a DeferredDataFrame nested dictionaries, e.g.,</dt>
<dd><code class="docutils literal notranslate"><span class="pre">{'a':</span> <span class="pre">{'b':</span> <span class="pre">np.nan}}</span></code>, are read as follows: look in column
‘a’ for the value ‘b’ and replace it with NaN. The <cite>value</cite>
parameter should be <code class="docutils literal notranslate"><span class="pre">None</span></code> to use a nested dict in this
way. You can nest regular expressions as well. Note that
column names (the top-level dictionary keys in a nested
dictionary) <strong>cannot</strong> be regular expressions.</dd>
</dl>
</li>
</ul>
</div></blockquote>
</li>
<li>None:<blockquote>
<div><ul>
<li><dl class="first docutils">
<dt>This means that the <cite>regex</cite> argument must be a string,</dt>
<dd>compiled regular expression, or list, dict, ndarray or
DeferredSeries of such elements. If <cite>value</cite> is also <code class="docutils literal notranslate"><span class="pre">None</span></code> then
this <strong>must</strong> be a nested dictionary or DeferredSeries.</dd>
</dl>
</li>
</ul>
</div></blockquote>
</li>
</ul>
<p>See the examples section for examples of each of these.</p>
</li>
<li><strong>value</strong> (<em>scalar</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>regex</em><em>, </em><em>default None</em>) – Value to replace any values matching <cite>to_replace</cite> with.
For a DeferredDataFrame a dict of values can be used to specify which
value to use for each column (columns not in the dict will not be
filled). Regular expressions, strings and lists or dicts of such
objects are also allowed.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If True, performs operation inplace and returns None.</li>
<li><strong>limit</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default None</em>) – Maximum size gap to forward or backward fill.</li>
<li><strong>regex</strong> (bool or same types as <cite>to_replace</cite>, default False) – Whether to interpret <cite>to_replace</cite> and/or <cite>value</cite> as regular
expressions. If this is <code class="docutils literal notranslate"><span class="pre">True</span></code> then <cite>to_replace</cite> <em>must</em> be a
string. Alternatively, this could be a regular expression or a
list, dict, or array of regular expressions in which case
<cite>to_replace</cite> must be <code class="docutils literal notranslate"><span class="pre">None</span></code>.</li>
<li><strong>method</strong> ({‘pad’, ‘ffill’, ‘bfill’, <cite>None</cite>}) – <p>The method to use when for replacement, when <cite>to_replace</cite> is a
scalar, list or tuple and <cite>value</cite> is <code class="docutils literal notranslate"><span class="pre">None</span></code>.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 0.23.0: </span>Added to DeferredDataFrame.</p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Object after replacement.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><ul class="first last">
<li><dl class="first docutils">
<dt><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#AssertionError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">AssertionError</span></code></a> – * If <cite>regex</cite> is not a <code class="docutils literal notranslate"><span class="pre">bool</span></code> and <cite>to_replace</cite> is not</dt>
<dd><p class="first last"><code class="docutils literal notranslate"><span class="pre">None</span></code>.</p>
</dd>
</dl>
</li>
<li><p class="first"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#TypeError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> – * If <cite>to_replace</cite> is not a scalar, array-like, <code class="docutils literal notranslate"><span class="pre">dict</span></code>, or <code class="docutils literal notranslate"><span class="pre">None</span></code>
* If <cite>to_replace</cite> is a <code class="docutils literal notranslate"><span class="pre">dict</span></code> and <cite>value</cite> is not a <code class="docutils literal notranslate"><span class="pre">list</span></code>,</p>
<blockquote>
<div><p><code class="docutils literal notranslate"><span class="pre">dict</span></code>, <code class="docutils literal notranslate"><span class="pre">ndarray</span></code>, or <code class="docutils literal notranslate"><span class="pre">DeferredSeries</span></code></p>
</div></blockquote>
<ul class="simple">
<li><dl class="first docutils">
<dt>If <cite>to_replace</cite> is <code class="docutils literal notranslate"><span class="pre">None</span></code> and <cite>regex</cite> is not compilable</dt>
<dd>into a regular expression or is a list, dict, ndarray, or
DeferredSeries.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>When replacing multiple <code class="docutils literal notranslate"><span class="pre">bool</span></code> or <code class="docutils literal notranslate"><span class="pre">datetime64</span></code> objects and</dt>
<dd>the arguments to <cite>to_replace</cite> does not match the type of the
value being replaced</dd>
</dl>
</li>
</ul>
</li>
<li><dl class="first docutils">
<dt><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#ValueError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> – * If a <code class="docutils literal notranslate"><span class="pre">list</span></code> or an <code class="docutils literal notranslate"><span class="pre">ndarray</span></code> is passed to <cite>to_replace</cite> and</dt>
<dd><p class="first last"><cite>value</cite> but they are not the same length.</p>
</dd>
</dl>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">method</span></code> is not supported in the Beam DataFrame API because it is
order-sensitive. It cannot be specified.</p>
<p>If <code class="docutils literal notranslate"><span class="pre">limit</span></code> is specified this operation is not parallelizable.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.fillna" title="apache_beam.dataframe.frames.DeferredDataFrame.fillna"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.fillna()</span></code></a></dt>
<dd>Fill NA values.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.where" title="apache_beam.dataframe.frames.DeferredDataFrame.where"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.where()</span></code></a></dt>
<dd>Replace values based on boolean condition.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.str.replace()</span></code></dt>
<dd>Simple string replacement.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<ul class="simple">
<li><dl class="first docutils">
<dt>Regex substitution is performed under the hood with <code class="docutils literal notranslate"><span class="pre">re.sub</span></code>. The</dt>
<dd>rules for substitution for <code class="docutils literal notranslate"><span class="pre">re.sub</span></code> are the same.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>Regular expressions will only substitute on strings, meaning you</dt>
<dd>cannot provide, for example, a regular expression matching floating
point numbers and expect the columns in your frame that have a
numeric dtype to be matched. However, if those floating point
numbers <em>are</em> strings, then you can do this.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>This method has <em>a lot</em> of options. You are encouraged to experiment</dt>
<dd>and play with this method to gain intuition about how it works.</dd>
</dl>
</li>
<li><dl class="first docutils">
<dt>When dict is used as the <cite>to_replace</cite> value, it is like</dt>
<dd>key(s) in the dict are the to_replace part and
value(s) in the dict are the value parameter.</dd>
</dl>
</li>
</ul>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">**Scalar `to_replace` and `value`**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
<span class="go">0 5</span>
<span class="go">1 1</span>
<span class="go">2 2</span>
<span class="go">3 3</span>
<span class="go">4 4</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">9</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;C&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 5 5 a</span>
<span class="go">1 1 6 b</span>
<span class="go">2 2 7 c</span>
<span class="go">3 3 8 d</span>
<span class="go">4 4 9 e</span>
<span class="go">**List-like `to_replace`**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="mi">4</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 4 5 a</span>
<span class="go">1 4 6 b</span>
<span class="go">2 4 7 c</span>
<span class="go">3 4 8 d</span>
<span class="go">4 4 9 e</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">([</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">])</span>
<span class="go"> A B C</span>
<span class="go">0 4 5 a</span>
<span class="go">1 3 6 b</span>
<span class="go">2 2 7 c</span>
<span class="go">3 1 8 d</span>
<span class="go">4 4 9 e</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">replace</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">method</span><span class="o">=</span><span class="s1">&#39;bfill&#39;</span><span class="p">)</span>
<span class="go">0 0</span>
<span class="go">1 3</span>
<span class="go">2 3</span>
<span class="go">3 3</span>
<span class="go">4 4</span>
<span class="go">dtype: int64</span>
<span class="go">**dict-like `to_replace`**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">({</span><span class="mi">0</span><span class="p">:</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">1</span><span class="p">:</span> <span class="mi">100</span><span class="p">})</span>
<span class="go"> A B C</span>
<span class="go">0 10 5 a</span>
<span class="go">1 100 6 b</span>
<span class="go">2 2 7 c</span>
<span class="go">3 3 8 d</span>
<span class="go">4 4 9 e</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="mi">0</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="mi">5</span><span class="p">},</span> <span class="mi">100</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">0 100 100 a</span>
<span class="go">1 1 6 b</span>
<span class="go">2 2 7 c</span>
<span class="go">3 3 8 d</span>
<span class="go">4 4 9 e</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">4</span><span class="p">:</span> <span class="mi">400</span><span class="p">}})</span>
<span class="go"> A B C</span>
<span class="go">0 100 5 a</span>
<span class="go">1 1 6 b</span>
<span class="go">2 2 7 c</span>
<span class="go">3 3 8 d</span>
<span class="go">4 400 9 e</span>
<span class="go">**Regular expression `to_replace`**</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;bat&#39;</span><span class="p">,</span> <span class="s1">&#39;foo&#39;</span><span class="p">,</span> <span class="s1">&#39;bait&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;abc&#39;</span><span class="p">,</span> <span class="s1">&#39;bar&#39;</span><span class="p">,</span> <span class="s1">&#39;xyz&#39;</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">to_replace</span><span class="o">=</span><span class="sa">r</span><span class="s1">&#39;^ba.$&#39;</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="s1">&#39;new&#39;</span><span class="p">,</span> <span class="n">regex</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 new abc</span>
<span class="go">1 foo new</span>
<span class="go">2 bait xyz</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="sa">r</span><span class="s1">&#39;^ba.$&#39;</span><span class="p">},</span> <span class="p">{</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="s1">&#39;new&#39;</span><span class="p">},</span> <span class="n">regex</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 new abc</span>
<span class="go">1 foo bar</span>
<span class="go">2 bait xyz</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">regex</span><span class="o">=</span><span class="sa">r</span><span class="s1">&#39;^ba.$&#39;</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="s1">&#39;new&#39;</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 new abc</span>
<span class="go">1 foo new</span>
<span class="go">2 bait xyz</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">regex</span><span class="o">=</span><span class="p">{</span><span class="sa">r</span><span class="s1">&#39;^ba.$&#39;</span><span class="p">:</span> <span class="s1">&#39;new&#39;</span><span class="p">,</span> <span class="s1">&#39;foo&#39;</span><span class="p">:</span> <span class="s1">&#39;xyz&#39;</span><span class="p">})</span>
<span class="go"> A B</span>
<span class="go">0 new abc</span>
<span class="go">1 xyz new</span>
<span class="go">2 bait xyz</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">regex</span><span class="o">=</span><span class="p">[</span><span class="sa">r</span><span class="s1">&#39;^ba.$&#39;</span><span class="p">,</span> <span class="s1">&#39;foo&#39;</span><span class="p">],</span> <span class="n">value</span><span class="o">=</span><span class="s1">&#39;new&#39;</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 new abc</span>
<span class="go">1 new new</span>
<span class="go">2 bait xyz</span>
<span class="go">Compare the behavior of ``s.replace({&#39;a&#39;: None})`` and</span>
<span class="go">``s.replace(&#39;a&#39;, None)`` to understand the peculiarities</span>
<span class="go">of the `to_replace` parameter:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">10</span><span class="p">,</span> <span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;a&#39;</span><span class="p">])</span>
<span class="go">When one uses a dict as the `to_replace` value, it is like the</span>
<span class="go">value(s) in the dict are equal to the `value` parameter.</span>
<span class="go">``s.replace({&#39;a&#39;: None})`` is equivalent to</span>
<span class="go">``s.replace(to_replace={&#39;a&#39;: None}, value=None, method=None)``:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">replace</span><span class="p">({</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="kc">None</span><span class="p">})</span>
<span class="go">0 10</span>
<span class="go">1 None</span>
<span class="go">2 None</span>
<span class="go">3 b</span>
<span class="go">4 None</span>
<span class="go">dtype: object</span>
<span class="go">When ``value=None`` and `to_replace` is a scalar, list or</span>
<span class="go">tuple, `replace` uses the method parameter (default &#39;pad&#39;) to do the</span>
<span class="go">replacement. So this is why the &#39;a&#39; values are being replaced by 10</span>
<span class="go">in rows 1 and 2 and &#39;b&#39; in row 4 in this case.</span>
<span class="go">The command ``s.replace(&#39;a&#39;, None)`` is actually equivalent to</span>
<span class="go">``s.replace(to_replace=&#39;a&#39;, value=None, method=&#39;pad&#39;)``:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="go">0 10</span>
<span class="go">1 10</span>
<span class="go">2 10</span>
<span class="go">3 b</span>
<span class="go">4 b</span>
<span class="go">dtype: object</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.resample">
<code class="descname">resample</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.resample" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.resample.html#pandas.DataFrame.resample" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.resample()</span></code></a> is not yet supported in the Beam DataFrame API because implementing it would require integrating with Beam event-time semantics</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-event-time-semantics">https://s.apache.org/dataframe-event-time-semantics</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.reset_index">
<code class="descname">reset_index</code><span class="sig-paren">(</span><em>level=None</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.reset_index" title="Permalink to this definition"></a></dt>
<dd><p>Reset the index, or a level of it.</p>
<p>Reset the index of the DataFrame, and use the default one instead.
If the DataFrame has a MultiIndex, this method can remove one or more
levels.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.10)"><em>tuple</em></a><em>, or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, </em><em>default None</em>) – Only remove the given levels from the index. Removes all levels by
default.</li>
<li><strong>drop</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Do not try to insert index into dataframe columns. This resets
the index to the default integer index.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Modify the DeferredDataFrame in place (do not create a new object).</li>
<li><strong>col_level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default 0</em>) – If the columns have multiple levels, determines which level the
labels are inserted into. By default it is inserted into the first
level.</li>
<li><strong>col_fill</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><em>object</em></a><em>, </em><em>default ''</em>) – If the columns have multiple levels, determines how the other
levels are named. If None then the index name is repeated.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">DeferredDataFrame with the new index or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Dropping the entire index (e.g. with <code class="docutils literal notranslate"><span class="pre">reset_index(level=None)</span></code>) is
not parallelizable. It is also only guaranteed that the newly generated
index values will be unique. The Beam DataFrame API makes no guarantee
that the same index values as the equivalent pandas operation will be
generated, because that implementation is order-sensitive.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.set_index" title="apache_beam.dataframe.frames.DeferredDataFrame.set_index"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.set_index()</span></code></a></dt>
<dd>Opposite of reset_index.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.reindex" title="apache_beam.dataframe.frames.DeferredDataFrame.reindex"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.reindex()</span></code></a></dt>
<dd>Change to new indices or expand indices.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.reindex_like" title="apache_beam.dataframe.frames.DeferredDataFrame.reindex_like"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.reindex_like()</span></code></a></dt>
<dd>Change to same indices as other DeferredDataFrame.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([(</span><span class="s1">&#39;bird&#39;</span><span class="p">,</span> <span class="mf">389.0</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;bird&#39;</span><span class="p">,</span> <span class="mf">24.0</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="mf">80.5</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;parrot&#39;</span><span class="p">,</span> <span class="s1">&#39;lion&#39;</span><span class="p">,</span> <span class="s1">&#39;monkey&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;class&#39;</span><span class="p">,</span> <span class="s1">&#39;max_speed&#39;</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> class max_speed</span>
<span class="go">falcon bird 389.0</span>
<span class="go">parrot bird 24.0</span>
<span class="go">lion mammal 80.5</span>
<span class="go">monkey mammal NaN</span>
<span class="go">When we reset the index, the old index is added as a column, and a</span>
<span class="go">new sequential index is used:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">reset_index</span><span class="p">()</span>
<span class="go"> index class max_speed</span>
<span class="go">0 falcon bird 389.0</span>
<span class="go">1 parrot bird 24.0</span>
<span class="go">2 lion mammal 80.5</span>
<span class="go">3 monkey mammal NaN</span>
<span class="go">We can use the `drop` parameter to avoid the old index being added as</span>
<span class="go">a column:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">drop</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="go"> class max_speed</span>
<span class="go">0 bird 389.0</span>
<span class="go">1 bird 24.0</span>
<span class="go">2 mammal 80.5</span>
<span class="go">3 mammal NaN</span>
<span class="go">You can also use `reset_index` with `MultiIndex`.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">index</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_tuples</span><span class="p">([(</span><span class="s1">&#39;bird&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;bird&#39;</span><span class="p">,</span> <span class="s1">&#39;parrot&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="s1">&#39;lion&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="s1">&#39;monkey&#39;</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">names</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;class&#39;</span><span class="p">,</span> <span class="s1">&#39;name&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">columns</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">MultiIndex</span><span class="o">.</span><span class="n">from_tuples</span><span class="p">([(</span><span class="s1">&#39;speed&#39;</span><span class="p">,</span> <span class="s1">&#39;max&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="s1">&#39;species&#39;</span><span class="p">,</span> <span class="s1">&#39;type&#39;</span><span class="p">)])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([(</span><span class="mf">389.0</span><span class="p">,</span> <span class="s1">&#39;fly&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span> <span class="mf">24.0</span><span class="p">,</span> <span class="s1">&#39;fly&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span> <span class="mf">80.5</span><span class="p">,</span> <span class="s1">&#39;run&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">nan</span><span class="p">,</span> <span class="s1">&#39;jump&#39;</span><span class="p">)],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="n">index</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="n">columns</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> speed species</span>
<span class="go"> max type</span>
<span class="go">class name</span>
<span class="go">bird falcon 389.0 fly</span>
<span class="go"> parrot 24.0 fly</span>
<span class="go">mammal lion 80.5 run</span>
<span class="go"> monkey NaN jump</span>
<span class="go">If the index has multiple levels, we can reset a subset of them:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="s1">&#39;class&#39;</span><span class="p">)</span>
<span class="go"> class speed species</span>
<span class="go"> max type</span>
<span class="go">name</span>
<span class="go">falcon bird 389.0 fly</span>
<span class="go">parrot bird 24.0 fly</span>
<span class="go">lion mammal 80.5 run</span>
<span class="go">monkey mammal NaN jump</span>
<span class="go">If we are not dropping the index, by default, it is placed in the top</span>
<span class="go">level. We can place it in another level:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="s1">&#39;class&#39;</span><span class="p">,</span> <span class="n">col_level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> speed species</span>
<span class="go"> class max type</span>
<span class="go">name</span>
<span class="go">falcon bird 389.0 fly</span>
<span class="go">parrot bird 24.0 fly</span>
<span class="go">lion mammal 80.5 run</span>
<span class="go">monkey mammal NaN jump</span>
<span class="go">When the index is inserted under another level, we can specify under</span>
<span class="go">which one with the parameter `col_fill`:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="s1">&#39;class&#39;</span><span class="p">,</span> <span class="n">col_level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">col_fill</span><span class="o">=</span><span class="s1">&#39;species&#39;</span><span class="p">)</span>
<span class="go"> species speed species</span>
<span class="go"> class max type</span>
<span class="go">name</span>
<span class="go">falcon bird 389.0 fly</span>
<span class="go">parrot bird 24.0 fly</span>
<span class="go">lion mammal 80.5 run</span>
<span class="go">monkey mammal NaN jump</span>
<span class="go">If we specify a nonexistent level for `col_fill`, it is created:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">reset_index</span><span class="p">(</span><span class="n">level</span><span class="o">=</span><span class="s1">&#39;class&#39;</span><span class="p">,</span> <span class="n">col_level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">col_fill</span><span class="o">=</span><span class="s1">&#39;genus&#39;</span><span class="p">)</span>
<span class="go"> genus speed species</span>
<span class="go"> class max type</span>
<span class="go">name</span>
<span class="go">falcon bird 389.0 fly</span>
<span class="go">parrot bird 24.0 fly</span>
<span class="go">lion mammal 80.5 run</span>
<span class="go">monkey mammal NaN jump</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.rfloordiv">
<code class="descname">rfloordiv</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.rfloordiv" title="Permalink to this definition"></a></dt>
<dd><p>Get Integer division of dataframe and other, element-wise (binary operator <cite>rfloordiv</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">//</span> <span class="pre">dataframe</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>floordiv</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.rmod">
<code class="descname">rmod</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.rmod" title="Permalink to this definition"></a></dt>
<dd><p>Get Modulo of dataframe and other, element-wise (binary operator <cite>rmod</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">%</span> <span class="pre">dataframe</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>mod</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.rmul">
<code class="descname">rmul</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.rmul" title="Permalink to this definition"></a></dt>
<dd><p>Get Multiplication of dataframe and other, element-wise (binary operator <cite>rmul</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">*</span> <span class="pre">dataframe</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>mul</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.rolling">
<code class="descname">rolling</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.rolling" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.rolling.html#pandas.DataFrame.rolling" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.rolling()</span></code></a> is not yet supported in the Beam DataFrame API because implementing it would require integrating with Beam event-time semantics</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-event-time-semantics">https://s.apache.org/dataframe-event-time-semantics</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.rpow">
<code class="descname">rpow</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.rpow" title="Permalink to this definition"></a></dt>
<dd><p>Get Exponential power of dataframe and other, element-wise (binary operator <cite>rpow</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">**</span> <span class="pre">dataframe</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>pow</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.rsub">
<code class="descname">rsub</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.rsub" title="Permalink to this definition"></a></dt>
<dd><p>Get Subtraction of dataframe and other, element-wise (binary operator <cite>rsub</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">-</span> <span class="pre">dataframe</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>sub</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.rtruediv">
<code class="descname">rtruediv</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.rtruediv" title="Permalink to this definition"></a></dt>
<dd><p>Get Floating division of dataframe and other, element-wise (binary operator <cite>rtruediv</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">other</span> <span class="pre">/</span> <span class="pre">dataframe</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>truediv</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.set_flags">
<code class="descname">set_flags</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.set_flags" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.set_flags.html#pandas.DataFrame.set_flags" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.set_flags()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘set_flags’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.size">
<code class="descname">size</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.size" title="Permalink to this definition"></a></dt>
<dd><p>Return an int representing the number of elements in this object.</p>
<p>Return the number of rows if Series. Otherwise return the number of
rows times number of columns if DataFrame.</p>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-obj docutils literal notranslate"><span class="pre">ndarray.size</span></code></dt>
<dd>Number of elements in the array.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">({</span><span class="s1">&#39;a&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">:</span> <span class="mi">3</span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">size</span>
<span class="go">3</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">size</span>
<span class="go">4</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.slice_shift">
<code class="descname">slice_shift</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.slice_shift" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.slice_shift.html#pandas.DataFrame.slice_shift" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.slice_shift()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘slice_shift’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.sort_index">
<code class="descname">sort_index</code><span class="sig-paren">(</span><em>axis</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.sort_index" title="Permalink to this definition"></a></dt>
<dd><p>Sort object by labels (along an axis).</p>
<p>Returns a new DataFrame sorted by label if <cite>inplace</cite> argument is
<code class="docutils literal notranslate"><span class="pre">False</span></code>, otherwise updates the original DataFrame and returns None.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – The axis along which to sort. The value 0 identifies the rows,
and 1 identifies the columns.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>level name</em><em> or </em><em>list of ints</em><em> or </em><em>list of level names</em>) – If not None, sort on values in specified index level(s).</li>
<li><strong>ascending</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em> or </em><em>list-like of bools</em><em>, </em><em>default True</em>) – Sort ascending vs. descending. When the index is a MultiIndex the
sort direction can be controlled for each level individually.</li>
<li><strong>inplace</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If True, perform operation in-place.</li>
<li><strong>kind</strong> (<em>{'quicksort'</em><em>, </em><em>'mergesort'</em><em>, </em><em>'heapsort'</em><em>, </em><em>'stable'}</em><em>, </em><em>default 'quicksort'</em>) – Choice of sorting algorithm. See also <a class="reference external" href="https://numpy.org/doc/stable/reference/generated/numpy.sort.html#numpy.sort" title="(in NumPy v1.22)"><code class="xref py py-func docutils literal notranslate"><span class="pre">numpy.sort()</span></code></a> for more
information. <cite>mergesort</cite> and <cite>stable</cite> are the only stable algorithms. For
DeferredDataFrames, this option is only applied when sorting on a single
column or label.</li>
<li><strong>na_position</strong> (<em>{'first'</em><em>, </em><em>'last'}</em><em>, </em><em>default 'last'</em>) – Puts NaNs at the beginning if <cite>first</cite>; <cite>last</cite> puts NaNs at the end.
Not implemented for MultiIndex.</li>
<li><strong>sort_remaining</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – If True and sorting by level and index is multilevel, sort by other
levels too (in order) after sorting by specified level.</li>
<li><strong>ignore_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – <p>If True, the resulting axis will be labeled 0, 1, …, n - 1.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.0.0.</span></p>
</div>
</li>
<li><strong>key</strong> (<em>callable</em><em>, </em><em>optional</em>) – <p>If not None, apply the key function to the index values
before sorting. This is similar to the <cite>key</cite> argument in the
builtin <code class="xref py py-meth docutils literal notranslate"><span class="pre">sorted()</span></code> function, with the notable difference that
this <cite>key</cite> function should be <em>vectorized</em>. It should expect an
<code class="docutils literal notranslate"><span class="pre">Index</span></code> and return an <code class="docutils literal notranslate"><span class="pre">Index</span></code> of the same shape. For MultiIndex
inputs, the key is applied <em>per level</em>.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The original DeferredDataFrame sorted by the labels or None if <code class="docutils literal notranslate"><span class="pre">inplace=True</span></code>.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">axis=index</span></code> is not allowed because it imposes an ordering on the
dataset, and we cannot guarantee it will be maintained (see
<a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>). Only
<code class="docutils literal notranslate"><span class="pre">axis=columns</span></code> is allowed.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sort_index" title="apache_beam.dataframe.frames.DeferredSeries.sort_index"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sort_index()</span></code></a></dt>
<dd>Sort DeferredSeries by the index.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sort_values" title="apache_beam.dataframe.frames.DeferredDataFrame.sort_values"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sort_values()</span></code></a></dt>
<dd>Sort DeferredDataFrame by the value.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries.sort_values" title="apache_beam.dataframe.frames.DeferredSeries.sort_values"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredSeries.sort_values()</span></code></a></dt>
<dd>Sort DeferredSeries by the value.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">100</span><span class="p">,</span> <span class="mi">29</span><span class="p">,</span> <span class="mi">234</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">150</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sort_index</span><span class="p">()</span>
<span class="go"> A</span>
<span class="go">1 4</span>
<span class="go">29 2</span>
<span class="go">100 1</span>
<span class="go">150 5</span>
<span class="go">234 3</span>
<span class="go">By default, it sorts in ascending order, to sort in descending order,</span>
<span class="go">use ``ascending=False``</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sort_index</span><span class="p">(</span><span class="n">ascending</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go"> A</span>
<span class="go">234 3</span>
<span class="go">150 5</span>
<span class="go">100 1</span>
<span class="go">29 2</span>
<span class="go">1 4</span>
<span class="go">A key function can be specified which is applied to the index before</span>
<span class="go">sorting. For a ``MultiIndex`` this is applied to each level separately.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s2">&quot;a&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;C&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sort_index</span><span class="p">(</span><span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="o">.</span><span class="n">str</span><span class="o">.</span><span class="n">lower</span><span class="p">())</span>
<span class="go"> a</span>
<span class="go">A 1</span>
<span class="go">b 2</span>
<span class="go">C 3</span>
<span class="go">d 4</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.sort_values">
<code class="descname">sort_values</code><span class="sig-paren">(</span><em>axis</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.sort_values" title="Permalink to this definition"></a></dt>
<dd><p><code class="docutils literal notranslate"><span class="pre">sort_values</span></code> is not implemented.</p>
<p>It is not implemented for <code class="docutils literal notranslate"><span class="pre">axis=index</span></code> because it imposes an ordering on
the dataset, and it likely will not be maintained (see
<a class="reference external" href="https://s.apache.org/dataframe-order-sensitive-operations">https://s.apache.org/dataframe-order-sensitive-operations</a>).</p>
<p>It is not implemented for <code class="docutils literal notranslate"><span class="pre">axis=columns</span></code> because it makes the order of
the columns depend on the data (see
<a class="reference external" href="https://s.apache.org/dataframe-non-deferred-columns">https://s.apache.org/dataframe-non-deferred-columns</a>).</p>
</dd></dl>
<dl class="attribute">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.sparse">
<code class="descname">sparse</code><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.sparse" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.sparse.html#pandas.DataFrame.sparse" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.sparse()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘sparse’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-12425">BEAM-12425</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.squeeze">
<code class="descname">squeeze</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.squeeze" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.squeeze.html#pandas.DataFrame.squeeze" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.squeeze()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘squeeze’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.sub">
<code class="descname">sub</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="Permalink to this definition"></a></dt>
<dd><p>Get Subtraction of dataframe and other, element-wise (binary operator <cite>sub</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">dataframe</span> <span class="pre">-</span> <span class="pre">other</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>rsub</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.subtract">
<code class="descname">subtract</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.subtract" title="Permalink to this definition"></a></dt>
<dd><p>Get Subtraction of dataframe and other, element-wise (binary operator <cite>sub</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">dataframe</span> <span class="pre">-</span> <span class="pre">other</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>rsub</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.swapaxes">
<code class="descname">swapaxes</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.swapaxes" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.Series.swapaxes.html#pandas.Series.swapaxes" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.Series.swapaxes()</span></code></a> is not yet supported in the Beam DataFrame API because the columns in the output DataFrame depend on the data.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-columns">https://s.apache.org/dataframe-non-deferred-columns</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.swaplevel">
<code class="descname">swaplevel</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.swaplevel" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.swaplevel.html#pandas.DataFrame.swaplevel" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.swaplevel()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘swaplevel’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_clipboard">
<code class="descname">to_clipboard</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_clipboard" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_clipboard.html#pandas.DataFrame.to_clipboard" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_clipboard()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_clipboard’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_csv">
<code class="descname">to_csv</code><span class="sig-paren">(</span><em>path</em>, <em>transform_label=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_csv" title="Permalink to this definition"></a></dt>
<dd><p>Write object to a comma-separated values (csv) file.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>path_or_buf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>file handle</em><em>, </em><em>default None</em>) – <p>File path or object, if None is provided the result is returned as
a string. If a non-binary file object is passed, it should be opened
with <cite>newline=’’</cite>, disabling universal newlines. If a binary
file object is passed, <cite>mode</cite> might need to contain a <cite>‘b’</cite>.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.2.0: </span>Support for binary file objects was introduced.</p>
</div>
</li>
<li><strong>sep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default '</em><em>,</em><em>'</em>) – String of length 1. Field delimiter for the output file.</li>
<li><strong>na_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default ''</em>) – Missing data representation.</li>
<li><strong>float_format</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default None</em>) – Format string for floating point numbers.</li>
<li><strong>columns</strong> (<em>sequence</em><em>, </em><em>optional</em>) – Columns to write.</li>
<li><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em> or </em><em>list of str</em><em>, </em><em>default True</em>) – Write out the column names. If a list of strings is given it is
assumed to be aliases for the column names.</li>
<li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Write row names (index).</li>
<li><strong>index_label</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>sequence</em><em>, or </em><em>False</em><em>, </em><em>default None</em>) – Column label for index column(s) if desired. If None is given, and
<cite>header</cite> and <cite>index</cite> are True, then the index names are used. A
sequence should be given if the object uses MultiIndex. If
False do not print fields for index names. Use index_label=False
for easier importing in R.</li>
<li><strong>mode</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – Python write mode, default ‘w’.</li>
<li><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – A string representing the encoding to use in the output file,
defaults to ‘utf-8’. <cite>encoding</cite> is not supported if <cite>path_or_buf</cite>
is a non-binary file object.</li>
<li><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>If str, represents compression mode. If dict, value at ‘method’ is
the compression mode. Compression mode may be any of the following
possible values: {‘infer’, ‘gzip’, ‘bz2’, ‘zip’, ‘xz’, None}. If
compression mode is ‘infer’ and <cite>path_or_buf</cite> is path-like, then
detect compression mode from the following extensions: ‘.gz’,
‘.bz2’, ‘.zip’ or ‘.xz’. (otherwise no compression). If dict given
and mode is one of {‘zip’, ‘gzip’, ‘bz2’}, or inferred as
one of the above, other entries passed as
additional compression options.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.0.0: </span>May now be a dict with key ‘method’ as compression mode
and other entries as additional compression options if
compression mode is ‘zip’.</p>
</div>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.1.0: </span>Passing compression options as keys in dict is
supported for compression modes ‘gzip’ and ‘bz2’
as well as ‘zip’.</p>
</div>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.2.0: </span>Compression is supported for binary file objects.</p>
</div>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.2.0: </span>Previous versions forwarded dict entries for ‘gzip’ to
<cite>gzip.open</cite> instead of <cite>gzip.GzipFile</cite> which prevented
setting <cite>mtime</cite>.</p>
</div>
</li>
<li><strong>quoting</strong> (<em>optional constant from csv module</em>) – Defaults to csv.QUOTE_MINIMAL. If you have set a <cite>float_format</cite>
then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
will treat them as non-numeric.</li>
<li><strong>quotechar</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default '&quot;'</em>) – String of length 1. Character used to quote fields.</li>
<li><strong>line_terminator</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – The newline character or character sequence to use in the output
file. Defaults to <cite>os.linesep</cite>, which depends on the OS in which
this method is called (‘\n’ for linux, ‘\r\n’ for Windows, i.e.).</li>
<li><strong>chunksize</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a>) – Rows to write at a time.</li>
<li><strong>date_format</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default None</em>) – Format string for datetime objects.</li>
<li><strong>doublequote</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Control quoting of <cite>quotechar</cite> inside a field.</li>
<li><strong>escapechar</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default None</em>) – String of length 1. Character used to escape <cite>sep</cite> and <cite>quotechar</cite>
when appropriate.</li>
<li><strong>decimal</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default '.'</em>) – Character recognized as decimal separator. E.g. use ‘,’ for
European data.</li>
<li><strong>errors</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default 'strict'</em>) – <p>Specifies how encoding and decoding errors are to be handled.
See the errors argument for <a class="reference external" href="https://docs.python.org/3/library/functions.html#open" title="(in Python v3.10)"><code class="xref py py-func docutils literal notranslate"><span class="pre">open()</span></code></a> for a full list
of options.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
<li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib</span></code> as header options. For other URLs (e.g.
starting with “s3://”, and “gcs://”) the key-value pairs are forwarded to
<code class="docutils literal notranslate"><span class="pre">fsspec</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more details.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.2.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">If path_or_buf is None, returns the resulting csv format as a
string. Otherwise returns None.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a> or <a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)">str</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">read_csv()</span></code></dt>
<dd>Load a CSV file into a DeferredDataFrame.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_excel" title="apache_beam.dataframe.frames.DeferredDataFrame.to_excel"><code class="xref py py-meth docutils literal notranslate"><span class="pre">to_excel()</span></code></a></dt>
<dd>Write DeferredDataFrame to an Excel file.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;name&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;Raphael&#39;</span><span class="p">,</span> <span class="s1">&#39;Donatello&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;mask&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;red&#39;</span><span class="p">,</span> <span class="s1">&#39;purple&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;weapon&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;sai&#39;</span><span class="p">,</span> <span class="s1">&#39;bo staff&#39;</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
<span class="go">&#39;name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n&#39;</span>
<span class="go">Create &#39;out.zip&#39; containing &#39;out.csv&#39;</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">compression_opts</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">(</span><span class="n">method</span><span class="o">=</span><span class="s1">&#39;zip&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">archive_name</span><span class="o">=</span><span class="s1">&#39;out.csv&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="s1">&#39;out.zip&#39;</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">compression</span><span class="o">=</span><span class="n">compression_opts</span><span class="p">)</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_excel">
<code class="descname">to_excel</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_excel" title="Permalink to this definition"></a></dt>
<dd><p>Write object to an Excel sheet.</p>
<p>To write a single object to an Excel .xlsx file it is only necessary to
specify a target file name. To write to multiple sheets it is necessary to
create an <cite>ExcelWriter</cite> object with a target file name, and specify a sheet
in the file to write to.</p>
<p>Multiple sheets may be written to by specifying unique <cite>sheet_name</cite>.
With all data written to the file it is necessary to save the changes.
Note that creating an <cite>ExcelWriter</cite> object with a file name that already
exists will result in the contents of the existing file being erased.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>excel_writer</strong> (<em>path-like</em><em>, </em><em>file-like</em><em>, or </em><em>ExcelWriter object</em>) – File path or existing ExcelWriter.</li>
<li><strong>sheet_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default 'Sheet1'</em>) – Name of sheet which will contain DeferredDataFrame.</li>
<li><strong>na_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default ''</em>) – Missing data representation.</li>
<li><strong>float_format</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – Format string for floating point numbers. For example
<code class="docutils literal notranslate"><span class="pre">float_format=&quot;%.2f&quot;</span></code> will format 0.1234 to 0.12.</li>
<li><strong>columns</strong> (<em>sequence</em><em> or </em><em>list of str</em><em>, </em><em>optional</em>) – Columns to write.</li>
<li><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em> or </em><em>list of str</em><em>, </em><em>default True</em>) – Write out the column names. If a list of string is given it is
assumed to be aliases for the column names.</li>
<li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Write row names (index).</li>
<li><strong>index_label</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>sequence</em><em>, </em><em>optional</em>) – Column label for index column(s) if desired. If not specified, and
<cite>header</cite> and <cite>index</cite> are True, then the index names are used. A
sequence should be given if the DeferredDataFrame uses MultiIndex.</li>
<li><strong>startrow</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 0</em>) – Upper left cell row to dump data frame.</li>
<li><strong>startcol</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 0</em>) – Upper left cell column to dump data frame.</li>
<li><strong>engine</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – <p>Write engine to use, ‘openpyxl’ or ‘xlsxwriter’. You can also set this
via the options <code class="docutils literal notranslate"><span class="pre">io.excel.xlsx.writer</span></code>, <code class="docutils literal notranslate"><span class="pre">io.excel.xls.writer</span></code>, and
<code class="docutils literal notranslate"><span class="pre">io.excel.xlsm.writer</span></code>.</p>
<div class="deprecated">
<p><span class="versionmodified">Deprecated since version 1.2.0: </span>As the <a class="reference external" href="https://pypi.org/project/xlwt/">xlwt</a> package is no longer
maintained, the <code class="docutils literal notranslate"><span class="pre">xlwt</span></code> engine will be removed in a future version
of pandas.</p>
</div>
</li>
<li><strong>merge_cells</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Write MultiIndex and Hierarchical Rows as merged cells.</li>
<li><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – Encoding of the resulting excel file. Only necessary for xlwt,
other writers support unicode natively.</li>
<li><strong>inf_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default 'inf'</em>) – Representation for infinity (there is no native representation for
infinity in Excel).</li>
<li><strong>verbose</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Display more information in the error logs.</li>
<li><strong>freeze_panes</strong> (<em>tuple of int</em><em> (</em><em>length 2</em><em>)</em><em>, </em><em>optional</em>) – Specifies the one-based bottommost row and rightmost column that
is to be frozen.</li>
<li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib</span></code> as header options. For other URLs (e.g.
starting with “s3://”, and “gcs://”) the key-value pairs are forwarded to
<code class="docutils literal notranslate"><span class="pre">fsspec</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more details.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.2.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_csv" title="apache_beam.dataframe.frames.DeferredDataFrame.to_csv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">to_csv()</span></code></a></dt>
<dd>Write DeferredDataFrame to a comma-separated values (csv) file.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">ExcelWriter()</span></code></dt>
<dd>Class for writing DeferredDataFrame objects into excel sheets.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">read_excel()</span></code></dt>
<dd>Read an Excel file into a pandas DeferredDataFrame.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">read_csv()</span></code></dt>
<dd>Read a comma-separated values (csv) file into DeferredDataFrame.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>For compatibility with <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_csv" title="apache_beam.dataframe.frames.DeferredDataFrame.to_csv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">to_csv()</span></code></a>,
to_excel serializes lists and dicts to strings before writing.</p>
<p>Once a workbook has been saved it is not possible to write further
data without rewriting the whole workbook.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Create, write to and save a workbook:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">([[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">],</span> <span class="p">[</span><span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;row 1&#39;</span><span class="p">,</span> <span class="s1">&#39;row 2&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;col 1&#39;</span><span class="p">,</span> <span class="s1">&#39;col 2&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="s2">&quot;output.xlsx&quot;</span><span class="p">)</span>
<span class="go">To specify the sheet name:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="s2">&quot;output.xlsx&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_1&#39;</span><span class="p">)</span>
<span class="go">If you wish to write to more than one sheet in the workbook, it is</span>
<span class="go">necessary to specify an ExcelWriter object:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df2</span> <span class="o">=</span> <span class="n">df1</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="k">with</span> <span class="n">pd</span><span class="o">.</span><span class="n">ExcelWriter</span><span class="p">(</span><span class="s1">&#39;output.xlsx&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">writer</span><span class="p">:</span>
<span class="gp">... </span> <span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="n">writer</span><span class="p">,</span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_1&#39;</span><span class="p">)</span>
<span class="gp">... </span> <span class="n">df2</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="n">writer</span><span class="p">,</span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_2&#39;</span><span class="p">)</span>
<span class="go">ExcelWriter can also be used to append to an existing Excel file:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="k">with</span> <span class="n">pd</span><span class="o">.</span><span class="n">ExcelWriter</span><span class="p">(</span><span class="s1">&#39;output.xlsx&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;a&#39;</span><span class="p">)</span> <span class="k">as</span> <span class="n">writer</span><span class="p">:</span>
<span class="gp">... </span> <span class="n">df</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="n">writer</span><span class="p">,</span> <span class="n">sheet_name</span><span class="o">=</span><span class="s1">&#39;Sheet_name_3&#39;</span><span class="p">)</span>
<span class="go">To set the library that is used to write the Excel file,</span>
<span class="go">you can pass the `engine` keyword (the default engine is</span>
<span class="go">automatically chosen depending on the file extension):</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df1</span><span class="o">.</span><span class="n">to_excel</span><span class="p">(</span><span class="s1">&#39;output1.xlsx&#39;</span><span class="p">,</span> <span class="n">engine</span><span class="o">=</span><span class="s1">&#39;xlsxwriter&#39;</span><span class="p">)</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_feather">
<code class="descname">to_feather</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_feather" title="Permalink to this definition"></a></dt>
<dd><p>Write a DataFrame to the binary Feather format.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first last simple">
<li><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>file-like object</em>) – If a string, it will be used as Root Directory path.</li>
<li><strong>**kwargs</strong><p>Additional keywords passed to <code class="xref py py-func docutils literal notranslate"><span class="pre">pyarrow.feather.write_feather()</span></code>.
Starting with pyarrow 0.17, this includes the <cite>compression</cite>,
<cite>compression_level</cite>, <cite>chunksize</cite> and <cite>version</cite> keywords.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_gbq">
<code class="descname">to_gbq</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_gbq" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_gbq.html#pandas.DataFrame.to_gbq" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_gbq()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_gbq’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_hdf">
<code class="descname">to_hdf</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_hdf" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_hdf.html#pandas.DataFrame.to_hdf" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_hdf()</span></code></a> is not yet supported in the Beam DataFrame API because HDF5 is a random access file format</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_html">
<code class="descname">to_html</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_html" title="Permalink to this definition"></a></dt>
<dd><p>Render a DataFrame as an HTML table.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>buf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>Path</em><em> or </em><em>StringIO-like</em><em>, </em><em>optional</em><em>, </em><em>default None</em>) – Buffer to write to. If None, the output is returned as a string.</li>
<li><strong>columns</strong> (<em>sequence</em><em>, </em><em>optional</em><em>, </em><em>default None</em>) – The subset of columns to write. Writes all columns by default.</li>
<li><strong>col_space</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em> or </em><em>dict of int</em><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – <p>The minimum width of each column in CSS length units. An int is assumed to be px units.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 0.25.0: </span>Ability to use str.</p>
</div>
</li>
<li><strong>header</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>optional</em>) – Whether to print column labels, default True.</li>
<li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>optional</em><em>, </em><em>default True</em>) – Whether to print index (row) labels.</li>
<li><strong>na_rep</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em><em>, </em><em>default 'NaN'</em>) – String representation of <code class="docutils literal notranslate"><span class="pre">NaN</span></code> to use.</li>
<li><strong>formatters</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.10)"><em>tuple</em></a><em> or </em><em>dict of one-param. functions</em><em>, </em><em>optional</em>) – Formatter functions to apply to columns’ elements by position or
name.
The result of each function must be a unicode string.
List/tuple must be of length equal to the number of columns.</li>
<li><strong>float_format</strong> (<em>one-parameter function</em><em>, </em><em>optional</em><em>, </em><em>default None</em>) – <p>Formatter function to apply to columns’ elements if they are
floats. This function must return a unicode string and will be
applied only to the non-<code class="docutils literal notranslate"><span class="pre">NaN</span></code> elements, with <code class="docutils literal notranslate"><span class="pre">NaN</span></code> being
handled by <code class="docutils literal notranslate"><span class="pre">na_rep</span></code>.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.2.0.</span></p>
</div>
</li>
<li><strong>sparsify</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>optional</em><em>, </em><em>default True</em>) – Set to False for a DeferredDataFrame with a hierarchical index to print
every multiindex key at each row.</li>
<li><strong>index_names</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>optional</em><em>, </em><em>default True</em>) – Prints the names of the indexes.</li>
<li><strong>justify</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default None</em>) – <p>How to justify the column labels. If None uses the option from
the print configuration (controlled by set_option), ‘right’ out
of the box. Valid values are</p>
<ul>
<li>left</li>
<li>right</li>
<li>center</li>
<li>justify</li>
<li>justify-all</li>
<li>start</li>
<li>end</li>
<li>inherit</li>
<li>match-parent</li>
<li>initial</li>
<li>unset.</li>
</ul>
</li>
<li><strong>max_rows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – Maximum number of rows to display in the console.</li>
<li><strong>min_rows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – The number of rows to display in the console in a truncated repr
(when number of rows is above <cite>max_rows</cite>).</li>
<li><strong>max_cols</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – Maximum number of columns to display in the console.</li>
<li><strong>show_dimensions</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Display DeferredDataFrame dimensions (number of rows by number of columns).</li>
<li><strong>decimal</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default '.'</em>) – Character recognized as decimal separator, e.g. ‘,’ in Europe.</li>
<li><strong>bold_rows</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Make the row labels bold in the output.</li>
<li><strong>classes</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#tuple" title="(in Python v3.10)"><em>tuple</em></a><em>, </em><em>default None</em>) – CSS class(es) to apply to the resulting html table.</li>
<li><strong>escape</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Convert the characters &lt;, &gt;, and &amp; to HTML-safe sequences.</li>
<li><strong>notebook</strong> (<em>{True</em><em>, </em><em>False}</em><em>, </em><em>default False</em>) – Whether the generated HTML is for IPython Notebook.</li>
<li><strong>border</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a>) – A <code class="docutils literal notranslate"><span class="pre">border=border</span></code> attribute is included in the opening
<cite>&lt;table&gt;</cite> tag. Default <code class="docutils literal notranslate"><span class="pre">pd.options.display.html.border</span></code>.</li>
<li><strong>encoding</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default &quot;utf-8&quot;</em>) – <p>Set character encoding.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.0.</span></p>
</div>
</li>
<li><strong>table_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – A css id is included in the opening <cite>&lt;table&gt;</cite> tag if specified.</li>
<li><strong>render_links</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – Convert URLs to HTML links.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">If buf is None, returns the result as a string. Otherwise returns
None.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)">str</a> or <a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_string" title="apache_beam.dataframe.frames.DeferredDataFrame.to_string"><code class="xref py py-meth docutils literal notranslate"><span class="pre">to_string()</span></code></a></dt>
<dd>Convert DeferredDataFrame to a string.</dd>
</dl>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_json">
<code class="descname">to_json</code><span class="sig-paren">(</span><em>path</em>, <em>orient=None</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_json" title="Permalink to this definition"></a></dt>
<dd><p>Convert the object to a JSON string.</p>
<p>Note NaN’s and None will be converted to null and datetime objects
will be converted to UNIX timestamps.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>path_or_buf</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>file handle</em><em>, </em><em>optional</em>) – File path or object. If not specified, the result is returned as
a string.</li>
<li><strong>orient</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – <p>Indication of expected JSON string format.</p>
<ul>
<li>DeferredSeries:<blockquote>
<div><ul>
<li>default is ‘index’</li>
<li>allowed values are: {‘split’, ‘records’, ‘index’, ‘table’}.</li>
</ul>
</div></blockquote>
</li>
<li>DeferredDataFrame:<blockquote>
<div><ul>
<li>default is ‘columns’</li>
<li>allowed values are: {‘split’, ‘records’, ‘index’, ‘columns’,
‘values’, ‘table’}.</li>
</ul>
</div></blockquote>
</li>
<li>The format of the JSON string:<blockquote>
<div><ul>
<li>’split’ : dict like {‘index’ -&gt; [index], ‘columns’ -&gt; [columns],
‘data’ -&gt; [values]}</li>
<li>’records’ : list like [{column -&gt; value}, … , {column -&gt; value}]</li>
<li>’index’ : dict like {index -&gt; {column -&gt; value}}</li>
<li>’columns’ : dict like {column -&gt; {index -&gt; value}}</li>
<li>’values’ : just the values array</li>
<li>’table’ : dict like {‘schema’: {schema}, ‘data’: {data}}</li>
</ul>
<p>Describing the data, where data component is like <code class="docutils literal notranslate"><span class="pre">orient='records'</span></code>.</p>
</div></blockquote>
</li>
</ul>
</li>
<li><strong>date_format</strong> (<em>{None</em><em>, </em><em>'epoch'</em><em>, </em><em>'iso'}</em>) – Type of date conversion. ‘epoch’ = epoch milliseconds,
‘iso’ = ISO8601. The default depends on the <cite>orient</cite>. For
<code class="docutils literal notranslate"><span class="pre">orient='table'</span></code>, the default is ‘iso’. For all other orients,
the default is ‘epoch’.</li>
<li><strong>double_precision</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>default 10</em>) – The number of decimal places to use when encoding
floating point values.</li>
<li><strong>force_ascii</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Force encoded string to be ASCII.</li>
<li><strong>date_unit</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default 'ms'</em><em> (</em><em>milliseconds</em><em>)</em>) – The time unit to encode to, governs timestamp and ISO8601
precision. One of ‘s’, ‘ms’, ‘us’, ‘ns’ for second, millisecond,
microsecond, and nanosecond respectively.</li>
<li><strong>default_handler</strong> (<em>callable</em><em>, </em><em>default None</em>) – Handler to call if object cannot otherwise be converted to a
suitable format for JSON. Should receive a single argument which is
the object to convert and return a serialisable object.</li>
<li><strong>lines</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default False</em>) – If ‘orient’ is ‘records’ write out line-delimited json format. Will
throw ValueError if incorrect ‘orient’ since others are not
list-like.</li>
<li><strong>compression</strong> (<em>{'infer'</em><em>, </em><em>'gzip'</em><em>, </em><em>'bz2'</em><em>, </em><em>'zip'</em><em>, </em><em>'xz'</em><em>, </em><em>None}</em>) – A string representing the compression to use in the output file,
only used when the first argument is a filename. By default, the
compression is inferred from the filename.</li>
<li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Whether to include the index values in the JSON string. Not
including the index (<code class="docutils literal notranslate"><span class="pre">index=False</span></code>) is only supported when
orient is ‘split’ or ‘table’.</li>
<li><strong>indent</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><em>optional</em>) – <p>Length of whitespace used to indent each record.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.0.0.</span></p>
</div>
</li>
<li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib</span></code> as header options. For other URLs (e.g.
starting with “s3://”, and “gcs://”) the key-value pairs are forwarded to
<code class="docutils literal notranslate"><span class="pre">fsspec</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more details.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.2.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">If path_or_buf is None, returns the resulting json format as a
string. Otherwise returns None.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)">None</a> or <a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)">str</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">read_json()</span></code></dt>
<dd>Convert a JSON string to pandas object.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>The behavior of <code class="docutils literal notranslate"><span class="pre">indent=0</span></code> varies from the stdlib, which does not
indent the output but does insert newlines. Currently, <code class="docutils literal notranslate"><span class="pre">indent=0</span></code>
and the default <code class="docutils literal notranslate"><span class="pre">indent=None</span></code> are equivalent in pandas, though this
may change in a future release.</p>
<p><code class="docutils literal notranslate"><span class="pre">orient='table'</span></code> contains a ‘pandas_version’ field under ‘schema’.
This stores the version of <cite>pandas</cite> used in the latest revision of the
schema.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">json</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span>
<span class="gp">... </span> <span class="p">[[</span><span class="s2">&quot;a&quot;</span><span class="p">,</span> <span class="s2">&quot;b&quot;</span><span class="p">],</span> <span class="p">[</span><span class="s2">&quot;c&quot;</span><span class="p">,</span> <span class="s2">&quot;d&quot;</span><span class="p">]],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;row 1&quot;</span><span class="p">,</span> <span class="s2">&quot;row 2&quot;</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">columns</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;col 1&quot;</span><span class="p">,</span> <span class="s2">&quot;col 2&quot;</span><span class="p">],</span>
<span class="gp">... </span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;split&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go">{</span>
<span class="go"> &quot;columns&quot;: [</span>
<span class="go"> &quot;col 1&quot;,</span>
<span class="go"> &quot;col 2&quot;</span>
<span class="go"> ],</span>
<span class="go"> &quot;index&quot;: [</span>
<span class="go"> &quot;row 1&quot;,</span>
<span class="go"> &quot;row 2&quot;</span>
<span class="go"> ],</span>
<span class="go"> &quot;data&quot;: [</span>
<span class="go"> [</span>
<span class="go"> &quot;a&quot;,</span>
<span class="go"> &quot;b&quot;</span>
<span class="go"> ],</span>
<span class="go"> [</span>
<span class="go"> &quot;c&quot;,</span>
<span class="go"> &quot;d&quot;</span>
<span class="go"> ]</span>
<span class="go"> ]</span>
<span class="go">}</span>
<span class="go">Encoding/decoding a Dataframe using ``&#39;records&#39;`` formatted JSON.</span>
<span class="go">Note that index labels are not preserved with this encoding.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;records&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go">[</span>
<span class="go"> {</span>
<span class="go"> &quot;col 1&quot;: &quot;a&quot;,</span>
<span class="go"> &quot;col 2&quot;: &quot;b&quot;</span>
<span class="go"> },</span>
<span class="go"> {</span>
<span class="go"> &quot;col 1&quot;: &quot;c&quot;,</span>
<span class="go"> &quot;col 2&quot;: &quot;d&quot;</span>
<span class="go"> }</span>
<span class="go">]</span>
<span class="go">Encoding/decoding a Dataframe using ``&#39;index&#39;`` formatted JSON:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;index&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go">{</span>
<span class="go"> &quot;row 1&quot;: {</span>
<span class="go"> &quot;col 1&quot;: &quot;a&quot;,</span>
<span class="go"> &quot;col 2&quot;: &quot;b&quot;</span>
<span class="go"> },</span>
<span class="go"> &quot;row 2&quot;: {</span>
<span class="go"> &quot;col 1&quot;: &quot;c&quot;,</span>
<span class="go"> &quot;col 2&quot;: &quot;d&quot;</span>
<span class="go"> }</span>
<span class="go">}</span>
<span class="go">Encoding/decoding a Dataframe using ``&#39;columns&#39;`` formatted JSON:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;columns&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go">{</span>
<span class="go"> &quot;col 1&quot;: {</span>
<span class="go"> &quot;row 1&quot;: &quot;a&quot;,</span>
<span class="go"> &quot;row 2&quot;: &quot;c&quot;</span>
<span class="go"> },</span>
<span class="go"> &quot;col 2&quot;: {</span>
<span class="go"> &quot;row 1&quot;: &quot;b&quot;,</span>
<span class="go"> &quot;row 2&quot;: &quot;d&quot;</span>
<span class="go"> }</span>
<span class="go">}</span>
<span class="go">Encoding/decoding a Dataframe using ``&#39;values&#39;`` formatted JSON:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;values&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go">[</span>
<span class="go"> [</span>
<span class="go"> &quot;a&quot;,</span>
<span class="go"> &quot;b&quot;</span>
<span class="go"> ],</span>
<span class="go"> [</span>
<span class="go"> &quot;c&quot;,</span>
<span class="go"> &quot;d&quot;</span>
<span class="go"> ]</span>
<span class="go">]</span>
<span class="go">Encoding with Table Schema:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">result</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">to_json</span><span class="p">(</span><span class="n">orient</span><span class="o">=</span><span class="s2">&quot;table&quot;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">parsed</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">result</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">parsed</span><span class="p">,</span> <span class="n">indent</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go">{</span>
<span class="go"> &quot;schema&quot;: {</span>
<span class="go"> &quot;fields&quot;: [</span>
<span class="go"> {</span>
<span class="go"> &quot;name&quot;: &quot;index&quot;,</span>
<span class="go"> &quot;type&quot;: &quot;string&quot;</span>
<span class="go"> },</span>
<span class="go"> {</span>
<span class="go"> &quot;name&quot;: &quot;col 1&quot;,</span>
<span class="go"> &quot;type&quot;: &quot;string&quot;</span>
<span class="go"> },</span>
<span class="go"> {</span>
<span class="go"> &quot;name&quot;: &quot;col 2&quot;,</span>
<span class="go"> &quot;type&quot;: &quot;string&quot;</span>
<span class="go"> }</span>
<span class="go"> ],</span>
<span class="go"> &quot;primaryKey&quot;: [</span>
<span class="go"> &quot;index&quot;</span>
<span class="go"> ],</span>
<span class="go"> &quot;pandas_version&quot;: &quot;0.20.0&quot;</span>
<span class="go"> },</span>
<span class="go"> &quot;data&quot;: [</span>
<span class="go"> {</span>
<span class="go"> &quot;index&quot;: &quot;row 1&quot;,</span>
<span class="go"> &quot;col 1&quot;: &quot;a&quot;,</span>
<span class="go"> &quot;col 2&quot;: &quot;b&quot;</span>
<span class="go"> },</span>
<span class="go"> {</span>
<span class="go"> &quot;index&quot;: &quot;row 2&quot;,</span>
<span class="go"> &quot;col 1&quot;: &quot;c&quot;,</span>
<span class="go"> &quot;col 2&quot;: &quot;d&quot;</span>
<span class="go"> }</span>
<span class="go"> ]</span>
<span class="go">}</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_latex">
<code class="descname">to_latex</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_latex" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_latex.html#pandas.DataFrame.to_latex" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_latex()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_latex’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_markdown">
<code class="descname">to_markdown</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_markdown" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_markdown.html#pandas.DataFrame.to_markdown" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_markdown()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_markdown’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_msgpack">
<code class="descname">to_msgpack</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_msgpack" title="Permalink to this definition"></a></dt>
<dd><p><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_msgpack()</span></code> is not yet supported in the Beam DataFrame API because it is deprecated in pandas.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_parquet">
<code class="descname">to_parquet</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_parquet" title="Permalink to this definition"></a></dt>
<dd><p>Write a DataFrame to the binary parquet format.</p>
<p>This function writes the dataframe as a <a class="reference external" href="https://parquet.apache.org/">parquet file</a>. You can choose different parquet
backends, and have the option of compression. See
<a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/io.html#io-parquet" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">the user guide</span></a> for more details.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>file-like object</em><em>, </em><em>default None</em>) – <p>If a string, it will be used as Root Directory path
when writing a partitioned dataset. By file-like object,
we refer to objects with a write() method, such as a file handle
(e.g. via builtin open function) or io.BytesIO. The engine
fastparquet does not accept file-like objects. If path is None,
a bytes object is returned.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.2.0.</span></p>
</div>
<p>Previously this was “fname”</p>
</li>
<li><strong>engine</strong> (<em>{'auto'</em><em>, </em><em>'pyarrow'</em><em>, </em><em>'fastparquet'}</em><em>, </em><em>default 'auto'</em>) – Parquet library to use. If ‘auto’, then the option
<code class="docutils literal notranslate"><span class="pre">io.parquet.engine</span></code> is used. The default <code class="docutils literal notranslate"><span class="pre">io.parquet.engine</span></code>
behavior is to try ‘pyarrow’, falling back to ‘fastparquet’ if
‘pyarrow’ is unavailable.</li>
<li><strong>compression</strong> (<em>{'snappy'</em><em>, </em><em>'gzip'</em><em>, </em><em>'brotli'</em><em>, </em><em>None}</em><em>, </em><em>default 'snappy'</em>) – Name of the compression to use. Use <code class="docutils literal notranslate"><span class="pre">None</span></code> for no compression.</li>
<li><strong>index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default None</em>) – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, include the dataframe’s index(es) in the file output.
If <code class="docutils literal notranslate"><span class="pre">False</span></code>, they will not be written to the file.
If <code class="docutils literal notranslate"><span class="pre">None</span></code>, similar to <code class="docutils literal notranslate"><span class="pre">True</span></code> the dataframe’s index(es)
will be saved. However, instead of being saved as values,
the RangeIndex will be stored as a range in the metadata so it
doesn’t require much space and is faster. Other indexes will
be included as columns in the file output.</li>
<li><strong>partition_cols</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, </em><em>optional</em><em>, </em><em>default None</em>) – Column names by which to partition the dataset.
Columns are partitioned in the order they are given.
Must be None if path is not a string.</li>
<li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib</span></code> as header options. For other URLs (e.g.
starting with “s3://”, and “gcs://”) the key-value pairs are forwarded to
<code class="docutils literal notranslate"><span class="pre">fsspec</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more details.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.2.0.</span></p>
</div>
</li>
<li><strong>**kwargs</strong> – Additional arguments passed to the parquet library. See
<a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/io.html#io-parquet" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">pandas io</span></a> for more details.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first"></p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">bytes if no path argument is provided else None</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">read_parquet()</span></code></dt>
<dd>Read a parquet file.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_csv" title="apache_beam.dataframe.frames.DeferredDataFrame.to_csv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.to_csv()</span></code></a></dt>
<dd>Write a csv file.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_sql" title="apache_beam.dataframe.frames.DeferredDataFrame.to_sql"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.to_sql()</span></code></a></dt>
<dd>Write to a sql table.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_hdf" title="apache_beam.dataframe.frames.DeferredDataFrame.to_hdf"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.to_hdf()</span></code></a></dt>
<dd>Write to hdf.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>This function requires either the <a class="reference external" href="https://pypi.org/project/fastparquet">fastparquet</a> or <a class="reference external" href="https://arrow.apache.org/docs/python/">pyarrow</a> library.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;col1&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="s1">&#39;col2&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_parquet</span><span class="p">(</span><span class="s1">&#39;df.parquet.gzip&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="n">compression</span><span class="o">=</span><span class="s1">&#39;gzip&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">pd</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="s1">&#39;df.parquet.gzip&#39;</span><span class="p">)</span>
<span class="go"> col1 col2</span>
<span class="go">0 1 3</span>
<span class="go">1 2 4</span>
<span class="go">If you want to get a buffer to the parquet content you can use a io.BytesIO</span>
<span class="go">object, as long as you don&#39;t use partition_cols, which creates multiple files.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="kn">import</span> <span class="nn">io</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">f</span> <span class="o">=</span> <span class="n">io</span><span class="o">.</span><span class="n">BytesIO</span><span class="p">()</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_parquet</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">f</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="go">0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">content</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_period">
<code class="descname">to_period</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_period" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_period.html#pandas.DataFrame.to_period" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_period()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_period’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_pickle">
<code class="descname">to_pickle</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_pickle" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_pickle.html#pandas.DataFrame.to_pickle" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_pickle()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_pickle’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_sql">
<code class="descname">to_sql</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_sql" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_sql.html#pandas.DataFrame.to_sql" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_sql()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_sql’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_stata">
<code class="descname">to_stata</code><span class="sig-paren">(</span><em>path</em>, <em>*args</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_stata" title="Permalink to this definition"></a></dt>
<dd><p>Export DataFrame object to Stata dta format.</p>
<p>Writes the DataFrame to a Stata dataset file.
“dta” files contain a Stata dataset.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>buffer</em><em> or </em><em>path object</em>) – <p>String, path object (pathlib.Path or py._path.local.LocalPath) or
object implementing a binary write() function. If using a buffer
then the buffer will not be automatically closed after the file
data has been written.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.0.0.</span></p>
</div>
<p>Previously this was “fname”</p>
</li>
<li><strong>convert_dates</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a>) – Dictionary mapping columns containing datetime types to stata
internal format to use when writing the dates. Options are ‘tc’,
‘td’, ‘tm’, ‘tw’, ‘th’, ‘tq’, ‘ty’. Column can be either an integer
or a name. Datetime columns that do not have a conversion type
specified will be converted to ‘tc’. Raises NotImplementedError if
a datetime column has timezone information.</li>
<li><strong>write_index</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a>) – Write the index to Stata dataset.</li>
<li><strong>byteorder</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a>) – Can be “&gt;”, “&lt;”, “little”, or “big”. default is <cite>sys.byteorder</cite>.</li>
<li><strong>time_stamp</strong> (<em>datetime</em>) – A datetime to use as file creation date. Default is the current
time.</li>
<li><strong>data_label</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>optional</em>) – A label for the data set. Must be 80 characters or smaller.</li>
<li><strong>variable_labels</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a>) – Dictionary containing columns as keys and variable labels as
values. Each label must be 80 characters or smaller.</li>
<li><strong>version</strong> (<em>{114</em><em>, </em><em>117</em><em>, </em><em>118</em><em>, </em><em>119</em><em>, </em><em>None}</em><em>, </em><em>default 114</em>) – <p>Version to use in the output dta file. Set to None to let pandas
decide between 118 or 119 formats depending on the number of
columns in the frame. pandas Version 114 can be read by Stata 10 and
later. pandas Version 117 can be read by Stata 13 or later. pandas Version 118
is supported in Stata 14 and later. pandas Version 119 is supported in
Stata 15 and later. pandas Version 114 limits string variables to 244
characters or fewer while versions 117 and later allow strings
with lengths up to 2,000,000 characters. Versions 118 and 119
support Unicode characters, and pandas version 119 supports more than
32,767 variables.</p>
<p>pandas Version 119 should usually only be used when the number of
variables exceeds the capacity of dta format 118. Exporting
smaller datasets in format 119 may have unintended consequences,
and, as of November 2020, Stata SE cannot read pandas version 119 files.</p>
<div class="versionchanged">
<p><span class="versionmodified">Changed in version 1.0.0: </span>Added support for formats 118 and 119.</p>
</div>
</li>
<li><strong>convert_strl</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.10)"><em>list</em></a><em>, </em><em>optional</em>) – List of column names to convert to string columns to Stata StrL
format. Only available if version is 117. Storing strings in the
StrL format can produce smaller dta files if strings have more than
8 characters and values are repeated.</li>
<li><strong>compression</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>default 'infer'</em>) – <p>For on-the-fly compression of the output dta. If string, specifies
compression mode. If dict, value at key ‘method’ specifies
compression mode. Compression mode must be one of {‘infer’, ‘gzip’,
‘bz2’, ‘zip’, ‘xz’, None}. If compression mode is ‘infer’ and
<cite>fname</cite> is path-like, then detect compression from the following
extensions: ‘.gz’, ‘.bz2’, ‘.zip’, or ‘.xz’ (otherwise no
compression). If dict and compression mode is one of {‘zip’,
‘gzip’, ‘bz2’}, or inferred as one of the above, other entries
passed as additional compression options.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.1.0.</span></p>
</div>
</li>
<li><strong>storage_options</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.10)"><em>dict</em></a><em>, </em><em>optional</em>) – <p>Extra options that make sense for a particular storage connection, e.g.
host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
are forwarded to <code class="docutils literal notranslate"><span class="pre">urllib</span></code> as header options. For other URLs (e.g.
starting with “s3://”, and “gcs://”) the key-value pairs are forwarded to
<code class="docutils literal notranslate"><span class="pre">fsspec</span></code>. Please see <code class="docutils literal notranslate"><span class="pre">fsspec</span></code> and <code class="docutils literal notranslate"><span class="pre">urllib</span></code> for more details.</p>
<div class="versionadded">
<p><span class="versionmodified">New in version 1.2.0.</span></p>
</div>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><ul class="first last">
<li><p class="first"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#NotImplementedError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">NotImplementedError</span></code></a> – * If datetimes contain timezone information
* Column dtype is not representable in Stata</p>
</li>
<li><dl class="first docutils">
<dt><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#ValueError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">ValueError</span></code></a> – * Columns listed in convert_dates are neither datetime64[ns]</dt>
<dd><p class="first last">or datetime.datetime</p>
</dd>
</dl>
<ul class="simple">
<li>Column listed in convert_dates is not in DeferredDataFrame</li>
<li>Categorical label contains more than 32,000 characters</li>
</ul>
</li>
</ul>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">read_stata()</span></code></dt>
<dd>Import Stata data files.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">io.stata.StataWriter()</span></code></dt>
<dd>Low-level writer for Stata data files.</dd>
<dt><code class="xref py py-meth docutils literal notranslate"><span class="pre">io.stata.StataWriter117()</span></code></dt>
<dd>Low-level writer for pandas version 117 files.</dd>
</dl>
</div>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;animal&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;falcon&#39;</span><span class="p">,</span> <span class="s1">&#39;parrot&#39;</span><span class="p">,</span> <span class="s1">&#39;falcon&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;parrot&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;speed&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">350</span><span class="p">,</span> <span class="mi">18</span><span class="p">,</span> <span class="mi">361</span><span class="p">,</span> <span class="mi">15</span><span class="p">]})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">to_stata</span><span class="p">(</span><span class="s1">&#39;animals.dta&#39;</span><span class="p">)</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_timestamp">
<code class="descname">to_timestamp</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_timestamp" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_timestamp.html#pandas.DataFrame.to_timestamp" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_timestamp()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_timestamp’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_xarray">
<code class="descname">to_xarray</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_xarray" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_xarray.html#pandas.DataFrame.to_xarray" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_xarray()</span></code></a> is not yet supported in the Beam DataFrame API because it produces an output type that is not deferred.</p>
<p>For more information see <a class="reference external" href="https://s.apache.org/dataframe-non-deferred-result">https://s.apache.org/dataframe-non-deferred-result</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.to_xml">
<code class="descname">to_xml</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.to_xml" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.to_xml.html#pandas.DataFrame.to_xml" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.to_xml()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘to_xml’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.transform">
<code class="descname">transform</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.transform" title="Permalink to this definition"></a></dt>
<dd><p>Call <code class="docutils literal notranslate"><span class="pre">func</span></code> on self producing a DataFrame with transformed values.</p>
<p>Produced DataFrame will have same axis length as self.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>func</strong> (<em>function</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>list-like</em><em> or </em><em>dict-like</em>) – <p>Function to use for transforming the data. If a function, must either
work when passed a DeferredDataFrame or when passed to DeferredDataFrame.apply. If func
is both list-like and dict-like, dict-like behavior takes precedence.</p>
<p>Accepted combinations are:</p>
<ul>
<li>function</li>
<li>string function name</li>
<li>list-like of functions and/or function names, e.g. <code class="docutils literal notranslate"><span class="pre">[np.exp,</span> <span class="pre">'sqrt']</span></code></li>
<li>dict-like of axis labels -&gt; functions, function names or list-like of such.</li>
</ul>
</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – If 0 or ‘index’: apply function to each column.
If 1 or ‘columns’: apply function to each row.</li>
<li><strong>*args</strong> – Positional arguments to pass to <cite>func</cite>.</li>
<li><strong>**kwargs</strong> – Keyword arguments to pass to <cite>func</cite>.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">A DeferredDataFrame that must have the same length as self.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last">ValueError : If the returned DeferredDataFrame has a different length than self.</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.agg" title="apache_beam.dataframe.frames.DeferredDataFrame.agg"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.agg()</span></code></a></dt>
<dd>Only perform aggregating type operations.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.apply" title="apache_beam.dataframe.frames.DeferredDataFrame.apply"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.apply()</span></code></a></dt>
<dd>Invoke function on a DeferredDataFrame.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Functions that mutate the passed object can produce unexpected
behavior or errors and are not supported. See <a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/gotchas.html#gotchas-udf-mutation" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span>Mutating with User Defined Function (UDF) methods</span></a>
for more details.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">3</span><span class="p">),</span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">4</span><span class="p">)})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B</span>
<span class="go">0 0 1</span>
<span class="go">1 1 2</span>
<span class="go">2 2 3</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">0 1 2</span>
<span class="go">1 2 3</span>
<span class="go">2 3 4</span>
<span class="go">Even though the resulting DataFrame must have the same length as the</span>
<span class="go">input DataFrame, it is possible to provide several input functions:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">3</span><span class="p">))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span>
<span class="go">0 0</span>
<span class="go">1 1</span>
<span class="go">2 2</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">transform</span><span class="p">([</span><span class="n">np</span><span class="o">.</span><span class="n">sqrt</span><span class="p">,</span> <span class="n">np</span><span class="o">.</span><span class="n">exp</span><span class="p">])</span>
<span class="go"> sqrt exp</span>
<span class="go">0 0.000000 1.000000</span>
<span class="go">1 1.000000 2.718282</span>
<span class="go">2 1.414214 7.389056</span>
<span class="go">You can call transform on a GroupBy object:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span>
<span class="gp">... </span> <span class="s2">&quot;Date&quot;</span><span class="p">:</span> <span class="p">[</span>
<span class="gp">... </span> <span class="s2">&quot;2015-05-08&quot;</span><span class="p">,</span> <span class="s2">&quot;2015-05-07&quot;</span><span class="p">,</span> <span class="s2">&quot;2015-05-06&quot;</span><span class="p">,</span> <span class="s2">&quot;2015-05-05&quot;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s2">&quot;2015-05-08&quot;</span><span class="p">,</span> <span class="s2">&quot;2015-05-07&quot;</span><span class="p">,</span> <span class="s2">&quot;2015-05-06&quot;</span><span class="p">,</span> <span class="s2">&quot;2015-05-05&quot;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s2">&quot;Data&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">5</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">6</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">50</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">60</span><span class="p">,</span> <span class="mi">120</span><span class="p">],</span>
<span class="gp">... </span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> Date Data</span>
<span class="go">0 2015-05-08 5</span>
<span class="go">1 2015-05-07 8</span>
<span class="go">2 2015-05-06 6</span>
<span class="go">3 2015-05-05 1</span>
<span class="go">4 2015-05-08 50</span>
<span class="go">5 2015-05-07 100</span>
<span class="go">6 2015-05-06 60</span>
<span class="go">7 2015-05-05 120</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">&#39;Date&#39;</span><span class="p">)[</span><span class="s1">&#39;Data&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="s1">&#39;sum&#39;</span><span class="p">)</span>
<span class="go">0 55</span>
<span class="go">1 108</span>
<span class="go">2 66</span>
<span class="go">3 121</span>
<span class="go">4 55</span>
<span class="go">5 108</span>
<span class="go">6 66</span>
<span class="go">7 121</span>
<span class="go">Name: Data, dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span>
<span class="gp">... </span> <span class="s2">&quot;c&quot;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span>
<span class="gp">... </span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="p">[</span><span class="s2">&quot;m&quot;</span><span class="p">,</span> <span class="s2">&quot;n&quot;</span><span class="p">,</span> <span class="s2">&quot;o&quot;</span><span class="p">,</span> <span class="s2">&quot;m&quot;</span><span class="p">,</span> <span class="s2">&quot;m&quot;</span><span class="p">,</span> <span class="s2">&quot;n&quot;</span><span class="p">,</span> <span class="s2">&quot;n&quot;</span><span class="p">]</span>
<span class="gp">... </span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> c type</span>
<span class="go">0 1 m</span>
<span class="go">1 1 n</span>
<span class="go">2 1 o</span>
<span class="go">3 2 m</span>
<span class="go">4 2 m</span>
<span class="go">5 2 n</span>
<span class="go">6 2 n</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="p">[</span><span class="s1">&#39;size&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">groupby</span><span class="p">(</span><span class="s1">&#39;c&#39;</span><span class="p">)[</span><span class="s1">&#39;type&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">transform</span><span class="p">(</span><span class="nb">len</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> c type size</span>
<span class="go">0 1 m 3</span>
<span class="go">1 1 n 3</span>
<span class="go">2 1 o 3</span>
<span class="go">3 2 m 4</span>
<span class="go">4 2 m 4</span>
<span class="go">5 2 n 4</span>
<span class="go">6 2 n 4</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.truediv">
<code class="descname">truediv</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="Permalink to this definition"></a></dt>
<dd><p>Get Floating division of dataframe and other, element-wise (binary operator <cite>truediv</cite>).</p>
<p>Equivalent to <code class="docutils literal notranslate"><span class="pre">dataframe</span> <span class="pre">/</span> <span class="pre">other</span></code>, but with support to substitute a fill_value
for missing data in one of the inputs. With reverse version, <cite>rtruediv</cite>.</p>
<p>Among flexible wrappers (<cite>add</cite>, <cite>sub</cite>, <cite>mul</cite>, <cite>div</cite>, <cite>mod</cite>, <cite>pow</cite>) to
arithmetic operators: <cite>+</cite>, <cite>-</cite>, <cite>*</cite>, <cite>/</cite>, <cite>//</cite>, <cite>%</cite>, <cite>**</cite>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>other</strong> (<em>scalar</em><em>, </em><em>sequence</em><em>, </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries"><em>DeferredSeries</em></a><em>, or </em><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame"><em>DeferredDataFrame</em></a>) – Any single or multiple element data structure, or list-like object.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em>) – Whether to compare by the index (0 or ‘index’) or columns
(1 or ‘columns’). For DeferredSeries input, axis to match DeferredSeries index on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em> or </em><em>label</em>) – Broadcast across a level, matching Index values on the
passed MultiIndex level.</li>
<li><strong>fill_value</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.10)"><em>float</em></a><em> or </em><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.10)"><em>None</em></a><em>, </em><em>default None</em>) – Fill existing missing (NaN) values, and any new element needed for
successful DeferredDataFrame alignment, with this value before computation.
If data in both corresponding DeferredDataFrame locations is missing
the result will be missing.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Result of the arithmetic operation.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Only level=None is supported</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.add" title="apache_beam.dataframe.frames.DeferredDataFrame.add"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.add()</span></code></a></dt>
<dd>Add DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.sub" title="apache_beam.dataframe.frames.DeferredDataFrame.sub"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.sub()</span></code></a></dt>
<dd>Subtract DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mul" title="apache_beam.dataframe.frames.DeferredDataFrame.mul"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mul()</span></code></a></dt>
<dd>Multiply DeferredDataFrames.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.div" title="apache_beam.dataframe.frames.DeferredDataFrame.div"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.div()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.truediv" title="apache_beam.dataframe.frames.DeferredDataFrame.truediv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.truediv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (float division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.floordiv" title="apache_beam.dataframe.frames.DeferredDataFrame.floordiv"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.floordiv()</span></code></a></dt>
<dd>Divide DeferredDataFrames (integer division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.mod" title="apache_beam.dataframe.frames.DeferredDataFrame.mod"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.mod()</span></code></a></dt>
<dd>Calculate modulo (remainder after division).</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.pow" title="apache_beam.dataframe.frames.DeferredDataFrame.pow"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.pow()</span></code></a></dt>
<dd>Calculate exponential power.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>Mismatched indices will be unioned together.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 360</span>
<span class="go">triangle 3 180</span>
<span class="go">rectangle 4 360</span>
<span class="go">Add a scalar with operator version which return the same</span>
<span class="go">results.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">+</span> <span class="mi">1</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 1 361</span>
<span class="go">triangle 4 181</span>
<span class="go">rectangle 5 361</span>
<span class="go">Divide by constant with reverse version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0.0 36.0</span>
<span class="go">triangle 0.3 18.0</span>
<span class="go">rectangle 0.4 36.0</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">rdiv</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle inf 0.027778</span>
<span class="go">triangle 3.333333 0.055556</span>
<span class="go">rectangle 2.500000 0.027778</span>
<span class="go">Subtract a list and Series by axis with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">-</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">]</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;columns&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 358</span>
<span class="go">triangle 2 178</span>
<span class="go">rectangle 3 358</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">]),</span>
<span class="gp">... </span> <span class="n">axis</span><span class="o">=</span><span class="s1">&#39;index&#39;</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle -1 359</span>
<span class="go">triangle 2 179</span>
<span class="go">rectangle 3 359</span>
<span class="go">Multiply a DataFrame of different shape with operator version.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">other</span>
<span class="go"> angles</span>
<span class="go">circle 0</span>
<span class="go">triangle 3</span>
<span class="go">rectangle 4</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">*</span> <span class="n">other</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 NaN</span>
<span class="go">triangle 9 NaN</span>
<span class="go">rectangle 16 NaN</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">mul</span><span class="p">(</span><span class="n">other</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">circle 0 0.0</span>
<span class="go">triangle 9 0.0</span>
<span class="go">rectangle 16 0.0</span>
<span class="go">Divide by a MultiIndex by level.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;angles&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;degrees&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">360</span><span class="p">,</span> <span class="mi">180</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">360</span><span class="p">,</span> <span class="mi">540</span><span class="p">,</span> <span class="mi">720</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[[</span><span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;A&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">,</span> <span class="s1">&#39;B&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="p">[</span><span class="s1">&#39;circle&#39;</span><span class="p">,</span> <span class="s1">&#39;triangle&#39;</span><span class="p">,</span> <span class="s1">&#39;rectangle&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;square&#39;</span><span class="p">,</span> <span class="s1">&#39;pentagon&#39;</span><span class="p">,</span> <span class="s1">&#39;hexagon&#39;</span><span class="p">]])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df_multindex</span>
<span class="go"> angles degrees</span>
<span class="go">A circle 0 360</span>
<span class="go"> triangle 3 180</span>
<span class="go"> rectangle 4 360</span>
<span class="go">B square 4 360</span>
<span class="go"> pentagon 5 540</span>
<span class="go"> hexagon 6 720</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">div</span><span class="p">(</span><span class="n">df_multindex</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">fill_value</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
<span class="go"> angles degrees</span>
<span class="go">A circle NaN 1.0</span>
<span class="go"> triangle 1.0 1.0</span>
<span class="go"> rectangle 1.0 1.0</span>
<span class="go">B square 0.0 0.0</span>
<span class="go"> pentagon 0.0 0.0</span>
<span class="go"> hexagon 0.0 0.0</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.truncate">
<code class="descname">truncate</code><span class="sig-paren">(</span><em>before</em>, <em>after</em>, <em>axis</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.truncate" title="Permalink to this definition"></a></dt>
<dd><p>Truncate a Series or DataFrame before and after some index value.</p>
<p>This is a useful shorthand for boolean indexing based on index
values above or below certain thresholds.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>before</strong> (<em>date</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a>) – Truncate all rows before this index value.</li>
<li><strong>after</strong> (<em>date</em><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a>) – Truncate all rows after this index value.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>optional</em>) – Axis to truncate. Truncates the index (rows) by default.</li>
<li><strong>copy</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default is True</em><em>,</em>) – Return a copy of the truncated section.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">The truncated DeferredSeries or DeferredDataFrame.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last">type of caller</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.loc" title="apache_beam.dataframe.frames.DeferredDataFrame.loc"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.loc()</span></code></a></dt>
<dd>Select a subset of a DeferredDataFrame by label.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.iloc" title="apache_beam.dataframe.frames.DeferredDataFrame.iloc"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.iloc()</span></code></a></dt>
<dd>Select a subset of a DeferredDataFrame by position.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p>If the index being truncated contains only datetime values,
<cite>before</cite> and <cite>after</cite> may be specified as strings instead of
Timestamps.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;a&#39;</span><span class="p">,</span> <span class="s1">&#39;b&#39;</span><span class="p">,</span> <span class="s1">&#39;c&#39;</span><span class="p">,</span> <span class="s1">&#39;d&#39;</span><span class="p">,</span> <span class="s1">&#39;e&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;B&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;f&#39;</span><span class="p">,</span> <span class="s1">&#39;g&#39;</span><span class="p">,</span> <span class="s1">&#39;h&#39;</span><span class="p">,</span> <span class="s1">&#39;i&#39;</span><span class="p">,</span> <span class="s1">&#39;j&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;C&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;k&#39;</span><span class="p">,</span> <span class="s1">&#39;l&#39;</span><span class="p">,</span> <span class="s1">&#39;m&#39;</span><span class="p">,</span> <span class="s1">&#39;n&#39;</span><span class="p">,</span> <span class="s1">&#39;o&#39;</span><span class="p">]},</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> A B C</span>
<span class="go">1 a f k</span>
<span class="go">2 b g l</span>
<span class="go">3 c h m</span>
<span class="go">4 d i n</span>
<span class="go">5 e j o</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">truncate</span><span class="p">(</span><span class="n">before</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">after</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go"> A B C</span>
<span class="go">2 b g l</span>
<span class="go">3 c h m</span>
<span class="go">4 d i n</span>
<span class="go">The columns of a DataFrame can be truncated.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">truncate</span><span class="p">(</span><span class="n">before</span><span class="o">=</span><span class="s2">&quot;A&quot;</span><span class="p">,</span> <span class="n">after</span><span class="o">=</span><span class="s2">&quot;B&quot;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="s2">&quot;columns&quot;</span><span class="p">)</span>
<span class="go"> A B</span>
<span class="go">1 a f</span>
<span class="go">2 b g</span>
<span class="go">3 c h</span>
<span class="go">4 d i</span>
<span class="go">5 e j</span>
<span class="go">For Series, only rows can be truncated.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="p">[</span><span class="s1">&#39;A&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">truncate</span><span class="p">(</span><span class="n">before</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">after</span><span class="o">=</span><span class="mi">4</span><span class="p">)</span>
<span class="go">2 b</span>
<span class="go">3 c</span>
<span class="go">4 d</span>
<span class="go">Name: A, dtype: object</span>
<span class="go">The index values in ``truncate`` can be datetimes or string</span>
<span class="go">dates.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">dates</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">date_range</span><span class="p">(</span><span class="s1">&#39;2016-01-01&#39;</span><span class="p">,</span> <span class="s1">&#39;2016-02-01&#39;</span><span class="p">,</span> <span class="n">freq</span><span class="o">=</span><span class="s1">&#39;s&#39;</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">index</span><span class="o">=</span><span class="n">dates</span><span class="p">,</span> <span class="n">data</span><span class="o">=</span><span class="p">{</span><span class="s1">&#39;A&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">})</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">tail</span><span class="p">()</span>
<span class="go"> A</span>
<span class="go">2016-01-31 23:59:56 1</span>
<span class="go">2016-01-31 23:59:57 1</span>
<span class="go">2016-01-31 23:59:58 1</span>
<span class="go">2016-01-31 23:59:59 1</span>
<span class="go">2016-02-01 00:00:00 1</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">truncate</span><span class="p">(</span><span class="n">before</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;2016-01-05&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">after</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">Timestamp</span><span class="p">(</span><span class="s1">&#39;2016-01-10&#39;</span><span class="p">))</span><span class="o">.</span><span class="n">tail</span><span class="p">()</span>
<span class="go"> A</span>
<span class="go">2016-01-09 23:59:56 1</span>
<span class="go">2016-01-09 23:59:57 1</span>
<span class="go">2016-01-09 23:59:58 1</span>
<span class="go">2016-01-09 23:59:59 1</span>
<span class="go">2016-01-10 00:00:00 1</span>
<span class="go">Because the index is a DatetimeIndex containing only dates, we can</span>
<span class="go">specify `before` and `after` as strings. They will be coerced to</span>
<span class="go">Timestamps before truncation.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">truncate</span><span class="p">(</span><span class="s1">&#39;2016-01-05&#39;</span><span class="p">,</span> <span class="s1">&#39;2016-01-10&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">tail</span><span class="p">()</span>
<span class="go"> A</span>
<span class="go">2016-01-09 23:59:56 1</span>
<span class="go">2016-01-09 23:59:57 1</span>
<span class="go">2016-01-09 23:59:58 1</span>
<span class="go">2016-01-09 23:59:59 1</span>
<span class="go">2016-01-10 00:00:00 1</span>
<span class="go">Note that ``truncate`` assumes a 0 value for any unspecified time</span>
<span class="go">component (midnight). This differs from partial string slicing, which</span>
<span class="go">returns any partially matching dates.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">loc</span><span class="p">[</span><span class="s1">&#39;2016-01-05&#39;</span><span class="p">:</span><span class="s1">&#39;2016-01-10&#39;</span><span class="p">,</span> <span class="p">:]</span><span class="o">.</span><span class="n">tail</span><span class="p">()</span>
<span class="go"> A</span>
<span class="go">2016-01-10 23:59:55 1</span>
<span class="go">2016-01-10 23:59:56 1</span>
<span class="go">2016-01-10 23:59:57 1</span>
<span class="go">2016-01-10 23:59:58 1</span>
<span class="go">2016-01-10 23:59:59 1</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.tshift">
<code class="descname">tshift</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.tshift" title="Permalink to this definition"></a></dt>
<dd><p><a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.DataFrame.tshift.html#pandas.DataFrame.tshift" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><code class="xref py py-meth docutils literal notranslate"><span class="pre">pandas.DataFrame.tshift()</span></code></a> is not implemented yet in the Beam DataFrame API.</p>
<p>If support for ‘tshift’ is important to you, please let the Beam community know by <a class="reference external" href="https://beam.apache.org/community/contact-us/">writing to user&#64;beam.apache.org</a> or commenting on <a class="reference external" href="https://issues.apache.org/jira/BEAM-9547">BEAM-9547</a>.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.tz_convert">
<code class="descname">tz_convert</code><span class="sig-paren">(</span><em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.tz_convert" title="Permalink to this definition"></a></dt>
<dd><p>Convert tz-aware axis to target time zone.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>tz</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>tzinfo object</em>) – </li>
<li><strong>axis</strong> (<em>the axis to convert</em>) – </li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default None</em>) – If axis is a MultiIndex, convert a specific level. Otherwise
must be None.</li>
<li><strong>copy</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Also make a copy of the underlying data.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Object with time zone converted axis.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first">{klass}</p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#TypeError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> – If the axis is tz-naive.</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>This operation has no known divergences from the pandas API.</p>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.tz_localize">
<code class="descname">tz_localize</code><span class="sig-paren">(</span><em>ambiguous</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.tz_localize" title="Permalink to this definition"></a></dt>
<dd><p>Localize tz-naive index of a Series or DataFrame to target time zone.</p>
<p>This operation localizes the Index. To localize the values in a
timezone-naive Series, use <code class="xref py py-meth docutils literal notranslate"><span class="pre">Series.dt.tz_localize()</span></code>.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>tz</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em> or </em><em>tzinfo</em>) – </li>
<li><strong>axis</strong> (<em>the axis to localize</em>) – </li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.10)"><em>int</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default None</em>) – If axis ia a MultiIndex, localize a specific level. Otherwise
must be None.</li>
<li><strong>copy</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – Also make a copy of the underlying data.</li>
<li><strong>ambiguous</strong> (<em>'infer'</em><em>, </em><em>bool-ndarray</em><em>, </em><em>'NaT'</em><em>, </em><em>default 'raise'</em>) – <p>When clocks moved backward due to DST, ambiguous times may arise.
For example in Central European Time (UTC+01), when going from
03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at
00:30:00 UTC and at 01:30:00 UTC. In such a situation, the
<cite>ambiguous</cite> parameter dictates how ambiguous times should be
handled.</p>
<ul>
<li>’infer’ will attempt to infer fall dst-transition hours based on
order</li>
<li>bool-ndarray where True signifies a DST time, False designates
a non-DST time (note that this flag is only applicable for
ambiguous times)</li>
<li>’NaT’ will return NaT where there are ambiguous times</li>
<li>’raise’ will raise an AmbiguousTimeError if there are ambiguous
times.</li>
</ul>
</li>
<li><strong>nonexistent</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.10)"><em>str</em></a><em>, </em><em>default 'raise'</em>) – <p>A nonexistent time does not exist in a particular timezone
where clocks moved forward due to DST. Valid values are:</p>
<ul>
<li>’shift_forward’ will shift the nonexistent time forward to the
closest existing time</li>
<li>’shift_backward’ will shift the nonexistent time backward to the
closest existing time</li>
<li>’NaT’ will return NaT where there are nonexistent times</li>
<li>timedelta objects will shift nonexistent times by the timedelta</li>
<li>’raise’ will raise an NonExistentTimeError if there are
nonexistent times.</li>
</ul>
</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Same type as the input.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Raises:</th><td class="field-body"><p class="first last"><a class="reference external" href="https://docs.python.org/3/library/exceptions.html#TypeError" title="(in Python v3.10)"><code class="xref py py-exc docutils literal notranslate"><span class="pre">TypeError</span></code></a> – If the TimeDeferredSeries is tz-aware and tz is not None.</p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p><code class="docutils literal notranslate"><span class="pre">ambiguous</span></code> cannot be set to <code class="docutils literal notranslate"><span class="pre">&quot;infer&quot;</span></code> as its semantics are
order-sensitive. Similarly, specifying <code class="docutils literal notranslate"><span class="pre">ambiguous</span></code> as an
<a class="reference external" href="https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html#numpy.ndarray" title="(in NumPy v1.22)"><code class="xref py py-class docutils literal notranslate"><span class="pre">ndarray</span></code></a> is order-sensitive, but you can achieve similar
functionality by specifying <code class="docutils literal notranslate"><span class="pre">ambiguous</span></code> as a Series.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="go">Localize local times:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">([</span><span class="mi">1</span><span class="p">],</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">DatetimeIndex</span><span class="p">([</span><span class="s1">&#39;2018-09-15 01:30:00&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">tz_localize</span><span class="p">(</span><span class="s1">&#39;CET&#39;</span><span class="p">)</span>
<span class="go">2018-09-15 01:30:00+02:00 1</span>
<span class="go">dtype: int64</span>
<span class="go">Be careful with DST changes. When there is sequential data, pandas</span>
<span class="go">can infer the DST time:</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">7</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">DatetimeIndex</span><span class="p">([</span><span class="s1">&#39;2018-10-28 01:30:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 02:00:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 02:30:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 02:00:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 02:30:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 03:00:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 03:30:00&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">tz_localize</span><span class="p">(</span><span class="s1">&#39;CET&#39;</span><span class="p">,</span> <span class="n">ambiguous</span><span class="o">=</span><span class="s1">&#39;infer&#39;</span><span class="p">)</span>
<span class="go">2018-10-28 01:30:00+02:00 0</span>
<span class="go">2018-10-28 02:00:00+02:00 1</span>
<span class="go">2018-10-28 02:30:00+02:00 2</span>
<span class="go">2018-10-28 02:00:00+01:00 3</span>
<span class="go">2018-10-28 02:30:00+01:00 4</span>
<span class="go">2018-10-28 03:00:00+01:00 5</span>
<span class="go">2018-10-28 03:30:00+01:00 6</span>
<span class="go">dtype: int64</span>
<span class="go">In some cases, inferring the DST is impossible. In such cases, you can</span>
<span class="go">pass an ndarray to the ambiguous parameter to set the DST explicitly</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">3</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">DatetimeIndex</span><span class="p">([</span><span class="s1">&#39;2018-10-28 01:20:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 02:36:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2018-10-28 03:46:00&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">tz_localize</span><span class="p">(</span><span class="s1">&#39;CET&#39;</span><span class="p">,</span> <span class="n">ambiguous</span><span class="o">=</span><span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="kc">True</span><span class="p">,</span> <span class="kc">True</span><span class="p">,</span> <span class="kc">False</span><span class="p">]))</span>
<span class="go">2018-10-28 01:20:00+02:00 0</span>
<span class="go">2018-10-28 02:36:00+02:00 1</span>
<span class="go">2018-10-28 03:46:00+01:00 2</span>
<span class="go">dtype: int64</span>
<span class="go">If the DST transition causes nonexistent times, you can shift these</span>
<span class="go">dates forward or backward with a timedelta object or `&#39;shift_forward&#39;`</span>
<span class="go">or `&#39;shift_backward&#39;`.</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">Series</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="mi">2</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">index</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">DatetimeIndex</span><span class="p">([</span><span class="s1">&#39;2015-03-29 02:30:00&#39;</span><span class="p">,</span>
<span class="gp">... </span> <span class="s1">&#39;2015-03-29 03:30:00&#39;</span><span class="p">]))</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">tz_localize</span><span class="p">(</span><span class="s1">&#39;Europe/Warsaw&#39;</span><span class="p">,</span> <span class="n">nonexistent</span><span class="o">=</span><span class="s1">&#39;shift_forward&#39;</span><span class="p">)</span>
<span class="go">2015-03-29 03:00:00+02:00 0</span>
<span class="go">2015-03-29 03:30:00+02:00 1</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">tz_localize</span><span class="p">(</span><span class="s1">&#39;Europe/Warsaw&#39;</span><span class="p">,</span> <span class="n">nonexistent</span><span class="o">=</span><span class="s1">&#39;shift_backward&#39;</span><span class="p">)</span>
<span class="go">2015-03-29 01:59:59.999999999+01:00 0</span>
<span class="go">2015-03-29 03:30:00+02:00 1</span>
<span class="go">dtype: int64</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">s</span><span class="o">.</span><span class="n">tz_localize</span><span class="p">(</span><span class="s1">&#39;Europe/Warsaw&#39;</span><span class="p">,</span> <span class="n">nonexistent</span><span class="o">=</span><span class="n">pd</span><span class="o">.</span><span class="n">Timedelta</span><span class="p">(</span><span class="s1">&#39;1H&#39;</span><span class="p">))</span>
<span class="go">2015-03-29 03:30:00+02:00 0</span>
<span class="go">2015-03-29 03:30:00+02:00 1</span>
<span class="go">dtype: int64</span>
</pre></div>
</div>
</dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.where">
<code class="descname">where</code><span class="sig-paren">(</span><em>cond</em>, <em>other</em>, <em>errors</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.where" title="Permalink to this definition"></a></dt>
<dd><p>where is not parallelizable when <code class="docutils literal notranslate"><span class="pre">errors=&quot;ignore&quot;</span></code> is specified.</p>
</dd></dl>
<dl class="classmethod">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.wrap">
<em class="property">classmethod </em><code class="descname">wrap</code><span class="sig-paren">(</span><em>expr</em>, <em>split_tuples=True</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.wrap" title="Permalink to this definition"></a></dt>
<dd></dd></dl>
<dl class="method">
<dt id="apache_beam.dataframe.frames.DeferredDataFrame.xs">
<code class="descname">xs</code><span class="sig-paren">(</span><em>key</em>, <em>axis</em>, <em>level</em>, <em>**kwargs</em><span class="sig-paren">)</span><a class="headerlink" href="#apache_beam.dataframe.frames.DeferredDataFrame.xs" title="Permalink to this definition"></a></dt>
<dd><p>Return cross-section from the Series/DataFrame.</p>
<p>This method takes a <cite>key</cite> argument to select data at a particular
level of a MultiIndex.</p>
<table class="docutils field-list" frame="void" rules="none">
<col class="field-name" />
<col class="field-body" />
<tbody valign="top">
<tr class="field-odd field"><th class="field-name">Parameters:</th><td class="field-body"><ul class="first simple">
<li><strong>key</strong> (<em>label</em><em> or </em><em>tuple of label</em>) – Label contained in the index, or partially in a MultiIndex.</li>
<li><strong>axis</strong> (<em>{0</em><em> or </em><em>'index'</em><em>, </em><em>1</em><em> or </em><em>'columns'}</em><em>, </em><em>default 0</em>) – Axis to retrieve cross-section on.</li>
<li><strong>level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#object" title="(in Python v3.10)"><em>object</em></a><em>, </em><em>defaults to first n levels</em><em> (</em><em>n=1</em><em> or </em><em>len</em><em>(</em><em>key</em><em>)</em><em>)</em>) – In case of a key partially contained in a MultiIndex, indicate
which levels are used. Levels can be referred by label or position.</li>
<li><strong>drop_level</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.10)"><em>bool</em></a><em>, </em><em>default True</em>) – If False, returns object with same levels as self.</li>
</ul>
</td>
</tr>
<tr class="field-even field"><th class="field-name">Returns:</th><td class="field-body"><p class="first">Cross-section from the original DeferredSeries or DeferredDataFrame
corresponding to the selected index levels.</p>
</td>
</tr>
<tr class="field-odd field"><th class="field-name">Return type:</th><td class="field-body"><p class="first last"><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredSeries" title="apache_beam.dataframe.frames.DeferredSeries">DeferredSeries</a> or <a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame" title="apache_beam.dataframe.frames.DeferredDataFrame">DeferredDataFrame</a></p>
</td>
</tr>
</tbody>
</table>
<p class="rubric">Differences from pandas</p>
<p>Note that <code class="docutils literal notranslate"><span class="pre">xs(axis='index')</span></code> will raise a <code class="docutils literal notranslate"><span class="pre">KeyError</span></code> at execution
time if the key does not exist in the index.</p>
<div class="admonition seealso">
<p class="first admonition-title">See also</p>
<dl class="last docutils">
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.loc" title="apache_beam.dataframe.frames.DeferredDataFrame.loc"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.loc()</span></code></a></dt>
<dd>Access a group of rows and columns by label(s) or a boolean array.</dd>
<dt><a class="reference internal" href="#apache_beam.dataframe.frames.DeferredDataFrame.iloc" title="apache_beam.dataframe.frames.DeferredDataFrame.iloc"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DeferredDataFrame.iloc()</span></code></a></dt>
<dd>Purely integer-location based indexing for selection by position.</dd>
</dl>
</div>
<p class="rubric">Notes</p>
<p><cite>xs</cite> can not be used to set values.</p>
<p>MultiIndex Slicers is a generic way to get/set values on
any level or levels.
It is a superset of <cite>xs</cite> functionality, see
<a class="reference external" href="https://pandas.pydata.org/pandas-docs/dev/user_guide/advanced.html#advanced-mi-slicers" title="(in pandas v1.5.0.dev0+279.g7651c08230)"><span class="xref std std-ref">MultiIndex Slicers</span></a>.</p>
<p class="rubric">Examples</p>
<p><strong>NOTE:</strong> These examples are pulled directly from the pandas documentation for convenience. Usage of the Beam DataFrame API will look different because it is a deferred API. In addition, some arguments shown here may not be supported, see <strong>‘Differences from pandas’</strong> for details.</p>
<div class="highlight-pycon notranslate"><div class="highlight"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">d</span> <span class="o">=</span> <span class="p">{</span><span class="s1">&#39;num_legs&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;num_wings&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;class&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="s1">&#39;bird&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;animal&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;cat&#39;</span><span class="p">,</span> <span class="s1">&#39;dog&#39;</span><span class="p">,</span> <span class="s1">&#39;bat&#39;</span><span class="p">,</span> <span class="s1">&#39;penguin&#39;</span><span class="p">],</span>
<span class="gp">... </span> <span class="s1">&#39;locomotion&#39;</span><span class="p">:</span> <span class="p">[</span><span class="s1">&#39;walks&#39;</span><span class="p">,</span> <span class="s1">&#39;walks&#39;</span><span class="p">,</span> <span class="s1">&#39;flies&#39;</span><span class="p">,</span> <span class="s1">&#39;walks&#39;</span><span class="p">]}</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">(</span><span class="n">data</span><span class="o">=</span><span class="n">d</span><span class="p">)</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">set_index</span><span class="p">([</span><span class="s1">&#39;class&#39;</span><span class="p">,</span> <span class="s1">&#39;animal&#39;</span><span class="p">,</span> <span class="s1">&#39;locomotion&#39;</span><span class="p">])</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span>
<span class="go"> num_legs num_wings</span>
<span class="go">class animal locomotion</span>
<span class="go">mammal cat walks 4 0</span>
<span class="go"> dog walks 4 0</span>
<span class="go"> bat flies 2 2</span>
<span class="go">bird penguin walks 2 2</span>
<span class="go">Get values at specified index</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;mammal&#39;</span><span class="p">)</span>
<span class="go"> num_legs num_wings</span>
<span class="go">animal locomotion</span>
<span class="go">cat walks 4 0</span>
<span class="go">dog walks 4 0</span>
<span class="go">bat flies 2 2</span>
<span class="go">Get values at several indexes</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">xs</span><span class="p">((</span><span class="s1">&#39;mammal&#39;</span><span class="p">,</span> <span class="s1">&#39;dog&#39;</span><span class="p">))</span>
<span class="go"> num_legs num_wings</span>
<span class="go">locomotion</span>
<span class="go">walks 4 0</span>
<span class="go">Get values at specified index and level</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;cat&#39;</span><span class="p">,</span> <span class="n">level</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go"> num_legs num_wings</span>
<span class="go">class locomotion</span>
<span class="go">mammal walks 4 0</span>
<span class="go">Get values at several indexes and levels</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">xs</span><span class="p">((</span><span class="s1">&#39;bird&#39;</span><span class="p">,</span> <span class="s1">&#39;walks&#39;</span><span class="p">),</span>
<span class="gp">... </span> <span class="n">level</span><span class="o">=</span><span class="p">[</span><span class="mi">0</span><span class="p">,</span> <span class="s1">&#39;locomotion&#39;</span><span class="p">])</span>
<span class="go"> num_legs num_wings</span>
<span class="go">animal</span>
<span class="go">penguin 2 2</span>
<span class="go">Get values at specified column and axis</span>
<span class="gp">&gt;&gt;&gt; </span><span class="n">df</span><span class="o">.</span><span class="n">xs</span><span class="p">(</span><span class="s1">&#39;num_wings&#39;</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
<span class="go">class animal locomotion</span>
<span class="go">mammal cat walks 0</span>
<span class="go"> dog walks 0</span>
<span class="go"> bat flies 2</span>
<span class="go">bird penguin walks 2</span>
<span class="go">Name: num_wings, dtype: int64</span>
</pre></div>
</div>
</dd></dl>
</dd></dl>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="apache_beam.dataframe.io.html" class="btn btn-neutral float-right" title="apache_beam.dataframe.io module" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
<a href="apache_beam.dataframe.frame_base.html" class="btn btn-neutral float-left" title="apache_beam.dataframe.frame_base module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
</div>
<hr/>
<div role="contentinfo">
<p>
&copy; Copyright
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>